DAG

library(DiagrammeR) 
# Nodes
 #node [shape = box]
 # S [label = 'Matched\n(S=1)',fontsize=7]
 # C [label = 'Not censored\n(C=0)',fontsize=7]
gr1<-
DiagrammeR::grViz("
digraph causal {

# Nodes
  node [shape = plaintext]
  a [label = 'Observed\nConfounders\n(Z)',fontsize=10]
  b [label = 'Unobserved\nConfounders\n(U)',fontsize=10]
  c [label = 'Early\nDrop-out\n(Y)',fontsize=10]
  d [label = 'Residential\nPrograms\n(X)',fontsize=10]

# Edges
  edge [color = black,
        arrowhead = vee]
  rankdir = TB;
  
  b -> c 
  b -> a 
  a -> c  

  d -> c [minlen=1]
  d -> a [minlen=1]
  
 # a -> S #[minlen=1]
 # Z -> S #[minlen=1]
  
#  a -> C #[minlen=3]
#  Z -> C #[minlen=3]
  { rank = same; b; a; c }
# { rank = same; S; C }
  { rankdir = LR; a; d }

# Graph
  graph [overlap = true]
}")
gr1

Figure 1. Directed Acyclic Graph

#  {rank=same ; A -> B -> C -> D};
#       {rank=same ;           F -> E[dir=back]};
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3733703/
#Cohort matching on a variable associated with both outcome and censoring
#Cohort matching on a confounder. We let A denote an exposure, Y denote an outcome, and C denote a confounder and matching variable. The variable S indicates whether an individual in the source population is selected for the matched study (1: selected, 0: not selected). See Section 2-7 for details.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7064555/
gr2<-
DiagrammeR::grViz("
digraph causal {

  # Nodes
  node [shape = plaintext]
  a [label = 'Residential\nPrograms\n(X)',fontsize=10]
  b [label = 'Unobserved\nConfounders\n(U)',fontsize=10]
  c [label = 'Early\nDrop-out\n(Y)',fontsize=10]
  d [label = 'Observed\nConfounders\n(Z)',fontsize=10]

  # Edges
  edge [color = black,
        arrowhead = vee]
  rankdir = TB
  a -> c [minlen=3]
  d -> a [minlen=3]
  d -> c [minlen=9]
  
  b -> a [minlen=1]
  b -> c
  
{ rank = same; c; d }
#{ rank = same; b; d }
  rankdir = TB
{ rank = same; d; c } #Ver si lo saco, creo que da problemas
  
  # Graph
  graph [overlap = true]
}")#LR

Balance

We selected treatments at baseline for each user, leaving 85,048 observations. Then, we distinguished between residential 12,706 and ambulatory (72,267) treatments. We imputed cases that did not have a defined treatment assigned 75.


We selected the following variables of interest:

  • “Starting Substance” (sus_ini_mvv)
  • “Marital Status” (estado_conyugal_2)
  • “Educational Attainment” (escolaridad_rec)
  • “Age of Onset of Drug Use” (edad_ini_cons)
  • “Frequency of use of primary drug” (freq_cons_sus_prin)
  • “Motive of Admission to Treatment” (origen_ingreso_mod)
  • “Psychiatric co-morbidity” (dg_cie_10_rec)
  • “Drug Dependence” (dg_trs_cons_sus_or)
  • “Chilean Region of the Center” (nombre_region)
  • “Type of Center (Public)” (tipo_centro_pub)
  • “Sex” (sexo_2)
  • “Age at Admission to Treatment” (edad_al_ing)
  • “Date of Admission to Treatment” (fech_ing_num)
  • “Evaluation of the Therapeutic Process” (*) (evaluacindelprocesoteraputico)
  • “Early Dropout (Against Staff Advice)” (abandono_temprano_rec) (Y)
  • “Residential Type of Plan” (tipo_de_plan_res) (Z)


library(compareGroups)

match.on_tot <- c("row", "hash_key","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","abandono_temprano_rec","tipo_de_plan_res","duplicates_filtered","dg_trs_cons_sus_or","evaluacindelprocesoteraputico")
#dg_trs_cons_sus_or

CONS_C1_df_dup_SEP_2020_match<-
  CONS_C1_df_dup_SEP_2020 %>% 
  dplyr::filter(dup==1) %>% #, tipo_de_plan_2 %in% c("PG-PR","M-PR","PG-PAI","M-PAI","PG-PAB","M-PAB")
  dplyr::mutate(tipo_de_plan_res=dplyr::case_when(grepl("PR",as.character(tipo_de_plan_2))~1,
                                                  grepl("PAI",as.character(tipo_de_plan_2))~0,
                                                  grepl("PAB",as.character(tipo_de_plan_2))~0,
                                                  TRUE~NA_real_)) %>% 
  dplyr::mutate(tipo_de_plan_res=factor(tipo_de_plan_res)) %>% 
  dplyr::mutate(abandono_temprano_rec=factor(if_else(as.character(motivodeegreso_mod_imp)=="Early Drop-out",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(dg_trs_cons_sus_or=factor(if_else(as.character(dg_trs_cons_sus_or)=="Drug dependence",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(tipo_centro_pub=factor(if_else(as.character(tipo_centro)=="Public",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(condicion_ocupacional_corr=factor(condicion_ocupacional_corr),cat_ocupacional_corr=factor(cat_ocupacional_corr)) %>% 
  dplyr::mutate(dg_trs_fis_rec=factor(dplyr::case_when(as.character(diagnostico_trs_fisico)=="En estudio"~"Diagnosis unknown (under study)",as.character(diagnostico_trs_fisico)=="Sin trastorno"~'Without physical comorbidity',cnt_diagnostico_trs_fisico>0 ~'With physical comorbidity',
                                             TRUE~NA_character_)))%>%
    dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered=T,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%   
dplyr::mutate(freq_cons_sus_prin=parse_factor(as.character(freq_cons_sus_prin),levels=c('Did not use', 'Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
  dplyr::mutate(evaluacindelprocesoteraputico=dplyr::case_when(grepl("1",as.character(evaluacindelprocesoteraputico))~'1-High Achievement',grepl("2",as.character(evaluacindelprocesoteraputico))~'2-Medium Achievement',grepl("3",as.character(evaluacindelprocesoteraputico))~'3-Minimum Achievement', TRUE~as.character(evaluacindelprocesoteraputico))) %>% 
  dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
  dplyr::select_(.dots = match.on_tot) %>% 
  dplyr::mutate(more_one_treat=factor(ifelse(duplicates_filtered>1,1,0))) %>% 
  data.table::data.table()
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
#CONS_C1_df_dup_SEP_2020_match %>% 
  #dplyr::group_by(dg_trs_fis) %>% dplyr::summarise(q1=quantile(dias_treat_imp_sin_na,.25),q2=quantile(dias_treat_imp_sin_na,.5),q3=quantile(dias_treat_imp_sin_na,.75)) ---> las distribuciones por días de tratamiento de las categorías de respuesta tienden a ser bastante similares, aunquequienes tienen una comorbiliad física definida tienen más tiempo en el estudio.
invisible("La diferencia en días de tratamiento entre las categorías de enfermedad psiquiátrica, indica que quienes se encuentran en estudio tienen muchos menos días en tratamiento que quienes no tienen una comorbilidad o quienes tienen una definida. No es lo mismo con el caso de la enfermedad física, en donde tienden a ser bastante similares")

invisible("Decidí no incluir diagnóstico de enferemedad física, porque hay algunas condiciones que son crónicas o que pueden serlo, y que no tengo cómo validarlas a lo largo del tratamiento")
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

attr(CONS_C1_df_dup_SEP_2020_match$sus_ini_mod_mvv,"label")<-"Starting Substance"
attr(CONS_C1_df_dup_SEP_2020_match$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons,"label")<-"Age of Onset of Drug Use"
attr(CONS_C1_df_dup_SEP_2020_match$freq_cons_sus_prin,"label")<-"Frequency of use of primary drug"
attr(CONS_C1_df_dup_SEP_2020_match$origen_ingreso_mod,"label")<-"Motive of Admission to Treatment"
attr(CONS_C1_df_dup_SEP_2020_match$dg_cie_10_rec,"label")<-"Psychiatric co-morbidity"
attr(CONS_C1_df_dup_SEP_2020_match$nombre_region,"label")<-"Chilean Region of the Center"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_centro_pub,"label")<-"Type of Center (Public)"
attr(CONS_C1_df_dup_SEP_2020_match$sexo_2,"label")<-"Sex"
attr(CONS_C1_df_dup_SEP_2020_match$edad_al_ing,"label")<-"Age at Admission"
attr(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,"label")<-"Date of Admission to Treatment"
attr(CONS_C1_df_dup_SEP_2020_match$abandono_temprano_rec,"label")<-"Early Dropout"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_de_plan_res,"label")<-"Residential Type of Plan"
attr(CONS_C1_df_dup_SEP_2020_match$duplicates_filtered,"label")<-"No. of Treatments in the Database"
attr(CONS_C1_df_dup_SEP_2020_match$dg_trs_cons_sus_or,"label")<-"Drug Dependence"
attr(CONS_C1_df_dup_SEP_2020_match$evaluacindelprocesoteraputico,"label")<-"Evaluation of the Therapeutic Process"

knitr::opts_chunk$set(echo = FALSE, warning=FALSE, message=FALSE)

table1_all <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ duplicates_filtered+ dg_trs_cons_sus_or+ evaluacindelprocesoteraputico, method= c(
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            duplicates_filtered=3,
                                            evaluacindelprocesoteraputico=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T)
)
table1_more_one <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ dg_trs_cons_sus_or+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ evaluacindelprocesoteraputico, method= c(
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            evaluacindelprocesoteraputico=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T,
                       subset= more_one_treat==1)
)
table1_only_one <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ dg_trs_cons_sus_or+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ evaluacindelprocesoteraputico, method= c(
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            evaluacindelprocesoteraputico=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T,
                       subset= more_one_treat==0)
)
 #Possible values are: 1 - for analysis as "normal-distributed"; 2 - forces analysis as "continuous non-normal"; 3 - forces analysis as "categorical"; and 4 - NA, which performs a Shapiro-Wilks test to decide between normal or non-normal. 

restab1_all <- createTable(table1_all, show.p.overall = T)
restab1_more_one <- createTable(table1_more_one, show.p.overall = T)
restab1_only_one <- createTable(table1_only_one, show.p.overall = T)

pvals1 <- getResults(table1_all)
#p.adjust(pvals, method = "BH")
 export2md(restab1_all, size=11, first.strip=T, hide.no="no", position="center",
           format="html",caption= "Table 1. Summary descriptives at baseline, between Users with Residential and Ambulatory Treatments from 2010-2019",col.names=c("Variables","Residential", "Ambulatory", "p-value"))%>%
  kableExtra::add_footnote(c("Note. Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;", "Categorical variables are presented as number (%)"), notation = "none")%>%
  kableExtra::scroll_box(width = "100%", height = "375px")
Table 1. Summary descriptives at baseline, between Users with Residential and Ambulatory Treatments from 2010-2019
Variables Residential Ambulatory p-value
N=72267 N=12706
Starting Substance: 0.000
Alcohol 41507 (57.4%) 5080 (40.0%)
Cocaine hydrochloride 2682 (3.71%) 477 (3.75%)
Marijuana 18412 (25.5%) 4556 (35.9%)
Other 1669 (2.31%) 318 (2.50%)
Cocaine paste 2767 (3.83%) 1086 (8.55%)
‘Missing’ 5230 (7.24%) 1189 (9.36%)
Marital Status: <0.001
Married/Shared living arrangements 26185 (36.2%) 2910 (22.9%)
Separated/Divorced 7721 (10.7%) 1320 (10.4%)
Single 37343 (51.7%) 8328 (65.5%)
Widower 869 (1.20%) 133 (1.05%)
‘Missing’ 149 (0.21%) 15 (0.12%)
Educational Attainment: <0.001
3-Completed primary school or less 20062 (27.8%) 3862 (30.4%)
2-Completed high school or less 39565 (54.7%) 7044 (55.4%)
1-More than high school 12279 (17.0%) 1777 (14.0%)
‘Missing’ 361 (0.50%) 23 (0.18%)
Frequency of use of primary drug: 0.000
Did not use 1095 (1.52%) 85 (0.67%)
Less than 1 day a week 2862 (3.96%) 133 (1.05%)
2 to 3 days a week 22372 (31.0%) 1329 (10.5%)
4 to 6 days a week 12258 (17.0%) 1654 (13.0%)
1 day a week or more 5335 (7.38%) 272 (2.14%)
Daily 27938 (38.7%) 9219 (72.6%)
‘Missing’ 407 (0.56%) 14 (0.11%)
Motive of Admission to Treatment: 0.000
Spontaneous 33720 (46.7%) 4273 (33.6%)
Assisted Referral 4950 (6.85%) 3013 (23.7%)
Other 3766 (5.21%) 740 (5.82%)
Justice Sector 7159 (9.91%) 812 (6.39%)
Health Sector 22672 (31.4%) 3868 (30.4%)
Psychiatric co-morbidity: <0.001
Without psychiatric comorbidity 29070 (40.2%) 3245 (25.5%)
Diagnosis unknown (under study) 13310 (18.4%) 2771 (21.8%)
With psychiatric comorbidity 29887 (41.4%) 6690 (52.7%)
Type of Center (Public): 0.000
FALSE 14964 (20.7%) 9066 (71.4%)
TRUE 57300 (79.3%) 3623 (28.5%)
‘Missing’ 3 (0.00%) 17 (0.13%)
Sex: <0.001
Men 54806 (75.8%) 8761 (69.0%)
Women 17461 (24.2%) 3945 (31.0%)
Drug Dependence: 0.000
FALSE 22150 (30.7%) 1049 (8.26%)
TRUE 50116 (69.3%) 11657 (91.7%)
‘Missing’ 1 (0.00%) 0 (0.00%)
Age at Admission 34.5 [27.6;43.5] 32.6 [26.3;40.9] <0.001
Date of Admission to Treatment 16577 [15730;17359] 16154 [15342;17023] <0.001
Early Dropout: <0.001
FALSE 61074 (84.5%) 10201 (80.3%)
TRUE 11190 (15.5%) 2499 (19.7%)
‘Missing’ 3 (0.00%) 6 (0.05%)
No. of Treatments in the Database: .
1 58708 (81.2%) 8533 (67.2%)
2 10087 (14.0%) 2804 (22.1%)
3 2471 (3.42%) 927 (7.30%)
4 714 (0.99%) 295 (2.32%)
5 192 (0.27%) 94 (0.74%)
6 67 (0.09%) 36 (0.28%)
7 23 (0.03%) 11 (0.09%)
8 4 (0.01%) 6 (0.05%)
10 1 (0.00%) 0 (0.00%)
Drug Dependence: 0.000
FALSE 22150 (30.7%) 1049 (8.26%)
TRUE 50116 (69.3%) 11657 (91.7%)
‘Missing’ 1 (0.00%) 0 (0.00%)
Evaluation of the Therapeutic Process: <0.001
1-High Achievement 14081 (19.5%) 2831 (22.3%)
2-Medium Achievement 21728 (30.1%) 4237 (33.3%)
3-Minimum Achievement 31549 (43.7%) 5302 (41.7%)
‘Missing’ 4909 (6.79%) 336 (2.64%)
Note. Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;
Categorical variables are presented as number (%)


Of the 85,048 users, we selected 85,048 that fulfilled the conditions stated above (100%).


#Additionally, we generated a correlation plot to get an overview of heterogeneous correlations between the different variables.

#<br>
require(polycor)
#Corresponde a la apreciación clínica que hace el equipo o profesional tratante, la persona en tratamiento y su familia, del nivel alcanzado de logro de los objetivos terapéuticos planteados al inicio del proceso y descritos en el plan de tratamiento personalizado. Los criterios incluyen la evaluación del estado clínico y psicosocial al momento del egreso y una apreciación pronostica del equipo tratante.

#Computes a heterogenous correlation matrix, consisting of Pearson product-moment correlations between numeric variables, polyserial correlations between numeric and ordinal variables, and polychoric correlations between 
tiempo_antes_hetcor<-Sys.time()
hetcor_mat<-hetcor(CONS_C1_df_dup_SEP_2020_match[,-c("hash_key","row","more_one_treat","duplicates_filtered")], ML = T, std.err =T, use="pairwise.complete.obs", bins=3, pd=TRUE)
tiempo_despues_hetcor<-Sys.time()
tiempo_hetcor<-tiempo_despues_hetcor-tiempo_antes_hetcor

attr(hetcor_mat$correlations,"dimnames")[[2]][1]<-"Starting Substance"
attr(hetcor_mat$correlations,"dimnames")[[2]][2]<-"Marital Status"
attr(hetcor_mat$correlations,"dimnames")[[2]][3]<-"Educational Attainment"
attr(hetcor_mat$correlations,"dimnames")[[2]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$correlations,"dimnames")[[2]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$correlations,"dimnames")[[2]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$correlations,"dimnames")[[2]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$correlations,"dimnames")[[2]][8]<-"Physical comorbidity"
attr(hetcor_mat$correlations,"dimnames")[[2]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$correlations,"dimnames")[[2]][9]<-"Type of Center (Public)"
attr(hetcor_mat$correlations,"dimnames")[[2]][10]<-"Sex"
attr(hetcor_mat$correlations,"dimnames")[[2]][11]<-"Age at Admission"
attr(hetcor_mat$correlations,"dimnames")[[2]][12]<-"Date of Admission"
attr(hetcor_mat$correlations,"dimnames")[[2]][13]<-"Early Drop out"
attr(hetcor_mat$correlations,"dimnames")[[2]][14]<-"Residential Treatment"
attr(hetcor_mat$correlations,"dimnames")[[2]][15]<-"Drug Dependence"
attr(hetcor_mat$correlations,"dimnames")[[2]][16]<-"Evaluation of the Therapeutic Process"

attr(hetcor_mat$correlations,"dimnames")[[1]][1]<-"Starting Substance"
attr(hetcor_mat$correlations,"dimnames")[[1]][2]<-"Marital Status"
attr(hetcor_mat$correlations,"dimnames")[[1]][3]<-"Educational Attainment"
attr(hetcor_mat$correlations,"dimnames")[[1]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$correlations,"dimnames")[[1]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$correlations,"dimnames")[[1]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$correlations,"dimnames")[[1]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$correlations,"dimnames")[[1]][8]<-"Physical comorbidity"
attr(hetcor_mat$correlations,"dimnames")[[1]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$correlations,"dimnames")[[1]][9]<-"Type of Center (Public)"
attr(hetcor_mat$correlations,"dimnames")[[1]][10]<-"Sex"
attr(hetcor_mat$correlations,"dimnames")[[1]][11]<-"Age at Admission"
attr(hetcor_mat$correlations,"dimnames")[[1]][12]<-"Date of Admission"
attr(hetcor_mat$correlations,"dimnames")[[1]][13]<-"Early Drop out"
attr(hetcor_mat$correlations,"dimnames")[[1]][14]<-"Residential Treatment"
attr(hetcor_mat$correlations,"dimnames")[[1]][15]<-"Drug Dependence"
attr(hetcor_mat$correlations,"dimnames")[[1]][16]<-"Evaluation of the Therapeutic Process"

attr(hetcor_mat$tests,"dimnames")[[2]][1]<-"Starting Substance"
attr(hetcor_mat$tests,"dimnames")[[2]][2]<-"Marital Status"
attr(hetcor_mat$tests,"dimnames")[[2]][3]<-"Educational Attainment"
attr(hetcor_mat$tests,"dimnames")[[2]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$tests,"dimnames")[[2]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$tests,"dimnames")[[2]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$tests,"dimnames")[[2]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$tests,"dimnames")[[2]][8]<-"Physical comorbidity"
attr(hetcor_mat$tests,"dimnames")[[2]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$tests,"dimnames")[[2]][9]<-"Type of Center (Public)"
attr(hetcor_mat$tests,"dimnames")[[2]][10]<-"Sex"
attr(hetcor_mat$tests,"dimnames")[[2]][11]<-"Age at Admission"
attr(hetcor_mat$tests,"dimnames")[[2]][12]<-"Date of Admission"
attr(hetcor_mat$tests,"dimnames")[[2]][13]<-"Early Drop out"
attr(hetcor_mat$tests,"dimnames")[[2]][14]<-"Residential Treatment"
attr(hetcor_mat$tests,"dimnames")[[2]][15]<-"Drug Dependence"
attr(hetcor_mat$tests,"dimnames")[[2]][16]<-"Evaluation of the Therapeutic Process"

attr(hetcor_mat$tests,"dimnames")[[1]][1]<-"Starting Substance"
attr(hetcor_mat$tests,"dimnames")[[1]][2]<-"Marital Status"
attr(hetcor_mat$tests,"dimnames")[[1]][3]<-"Educational Attainment"
attr(hetcor_mat$tests,"dimnames")[[1]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$tests,"dimnames")[[1]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$tests,"dimnames")[[1]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$tests,"dimnames")[[1]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$tests,"dimnames")[[1]][8]<-"Physical comorbidity"
attr(hetcor_mat$tests,"dimnames")[[1]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$tests,"dimnames")[[1]][9]<-"Type of Center (Public)"
attr(hetcor_mat$tests,"dimnames")[[1]][10]<-"Sex"
attr(hetcor_mat$tests,"dimnames")[[1]][11]<-"Age at Admission"
attr(hetcor_mat$tests,"dimnames")[[1]][12]<-"Date of Admission"
attr(hetcor_mat$tests,"dimnames")[[1]][13]<-"Early Drop out"
attr(hetcor_mat$tests,"dimnames")[[1]][14]<-"Residential Treatment"
attr(hetcor_mat$tests,"dimnames")[[1]][15]<-"Drug Dependence"
attr(hetcor_mat$tests,"dimnames")[[1]][16]<-"Evaluation of the Therapeutic Process"

hetcor_mat$tests[is.na(hetcor_mat$tests)]<-1

ggcorrplot<-
ggcorrplot::ggcorrplot(hetcor_mat$correlations,
           ggtheme = ggplot2::theme_void,
           insig = "blank",
           pch=1,
           pch.cex=3,
           tl.srt = 45, 
           #pch="ns",
            p.mat = hetcor_mat$tests, #  replacement has 144 rows, data has 169
            #type = "lower",
           colors = c("#6D9EC1", "white", "#E46726"), 
           tl.cex=8,
           lab=F)+
  #scale_x_discrete(labels = var_lbls_p345, drop = F) +
  #scale_y_discrete(labels = var_lbls_p345, drop = F) +
  theme(axis.text.x = element_blank())+
  #theme(axis.text.y = element_text(size=7.5,color ="black", hjust = 1))+
  theme(axis.text.y = element_blank())+
  theme(legend.position="bottom")

ggplotly(ggcorrplot, height = 800, width=800)%>% 
  layout(xaxis= list(showticklabels = FALSE)) %>% 
 layout(annotations = 
 list(x = .1, y = -0.031, text = "", 
      showarrow = F, xref='paper', yref='paper', 
      #xanchor='center', yanchor='auto', xshift=0, yshift=-0,
      font=list(size=11, color="darkblue"))
 )


Imputation


We generated a plot to see all the missing values in the sample.


Figure 3. Bar plot of Porcentaje of Missing Values per Variables at Basline






From the figure above, we could see that the starting substance (sus_ini_mvv), the onset of drug use (edad_ini_cons) and the evaluation of the therapeutic process (evaluacindelprocesoteraputico) had around 6% of missing data. These values should be imputed. We first focused on the age of onset of drug use. It is important to consider that the evaluation of the therapeutic process could be distorted due to censoring (many users did not finish their treatment, and did not have this evaluation in the study period).



Age at Admission

We started looking over the missing values in the age at admission (n8). Since there were not cases with more than one treatment that had not an age of admission, we did not have to impute taking into account serial dependencies in the dates of treatment.

Figure 5. Density Estimation of Distributions of Age at Admission & Imputed Age at Admission

Figure 5. Density Estimation of Distributions of Age at Admission & Imputed Age at Admission


As seen in the Figure above, distributions seem to differ. However, considering the low amount of missing values in this variable, we proceeded with the imputation with the mean, despite the differences found. The imputed values must not be greater than the age of onset of drug use and may not be lower than 16 years old. Values lower than this age may be considered less likely to receive treatment for adult population, so it would be most probably incorrect that they would be in this database.


## [1] "Users that had more than one treatment with no date of admission:0"


Age of Onset of Drug Use

Another variable worth imputing is the Age of Onset of Drug Use (n= 6,549).


Figure 6. Density Estimation of Distributions of Age Of Onset of Drug Use & Imputed Ones

Figure 6. Density Estimation of Distributions of Age Of Onset of Drug Use & Imputed Ones


Based on the figure above, the age of onset of drug use was similar between the imputed values and the observed. However, we followed the rules stated in Duplicates process (link). There were three logical conditions to fulfill in order to replace adequately these values in the database: the age of onset must not be greater than the age of onset of drug use in the primary substance at admission (1), may not be greater than the age of admission to treatment (2), and the age of onset of drug use must be greater than 4 years old. Then, we selected the minimum value of age of onset of drug use among the imputed, because one user could not have more than one age of onset of drug use.


## [1] "Number of users that had more than one different age of onset of drug use before replacement: 0"

Figure 7. Bar plot of Percentage of Incorrect Imputed Values per Imputation Sample

## [1] "Cases with more than missing one age of onset: 515"
## [1] "Number of rows with values that did not fulfilled the conditions: 0"
## [1] "Number of rows with values that did not fulfilled the conditions after replacement with the minimum by users: 0"
## [1] "Number of users that had different age of onset of drug use after replacement: 0"



There were 0 cases of imputed ages of onset of drug use that did not fulfilled the conditions necessary to replace the missing values with the imputed ones.


Starting Substance

Then we selected the most vulnerable value among the candidates of imputations of the starting substance (First, Cocaine paste, Cocaine hydrochloride or snort cocaine, Marijuana, Alcohol, and Other).


# Ver distintos valores propuestos para sustancia de inciio
sus_ini_mod_mvv_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$sus_ini_mod_mvv,
       amelia_fit$imputations$imp2$sus_ini_mod_mvv,
       amelia_fit$imputations$imp3$sus_ini_mod_mvv,
       amelia_fit$imputations$imp4$sus_ini_mod_mvv,
       amelia_fit$imputations$imp5$sus_ini_mod_mvv,
       amelia_fit$imputations$imp6$sus_ini_mod_mvv,
       amelia_fit$imputations$imp7$sus_ini_mod_mvv,
       amelia_fit$imputations$imp8$sus_ini_mod_mvv,
       amelia_fit$imputations$imp9$sus_ini_mod_mvv,
       amelia_fit$imputations$imp10$sus_ini_mod_mvv,
       amelia_fit$imputations$imp11$sus_ini_mod_mvv,
       amelia_fit$imputations$imp12$sus_ini_mod_mvv,
       amelia_fit$imputations$imp13$sus_ini_mod_mvv,
       amelia_fit$imputations$imp14$sus_ini_mod_mvv,
       amelia_fit$imputations$imp15$sus_ini_mod_mvv,
       amelia_fit$imputations$imp16$sus_ini_mod_mvv,
       amelia_fit$imputations$imp17$sus_ini_mod_mvv,
       amelia_fit$imputations$imp18$sus_ini_mod_mvv,
       amelia_fit$imputations$imp19$sus_ini_mod_mvv,
       amelia_fit$imputations$imp20$sus_ini_mod_mvv,
       amelia_fit$imputations$imp21$sus_ini_mod_mvv,
       amelia_fit$imputations$imp22$sus_ini_mod_mvv,
       amelia_fit$imputations$imp23$sus_ini_mod_mvv,
       amelia_fit$imputations$imp24$sus_ini_mod_mvv,
       amelia_fit$imputations$imp25$sus_ini_mod_mvv,
       amelia_fit$imputations$imp26$sus_ini_mod_mvv,
       amelia_fit$imputations$imp27$sus_ini_mod_mvv,
       amelia_fit$imputations$imp28$sus_ini_mod_mvv,
       amelia_fit$imputations$imp29$sus_ini_mod_mvv,
       amelia_fit$imputations$imp30$sus_ini_mod_mvv
       ) 

sus_ini_mod_mvv_imputed<-
sus_ini_mod_mvv_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Marijuana",as.character(.))~1,TRUE~0), .names="mar_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Alcohol",as.character(.))~1,TRUE~0), .names="oh_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine paste",as.character(.))~1,TRUE~0), .names="pb_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine hydrochloride",as.character(.))~1,TRUE~0), .names="coc_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Other",as.character(.))~1,TRUE~0), .names="otr_{col}"))%>%
        dplyr::mutate(sus_ini_mod_mvv_mar = base::rowSums(dplyr::select(., starts_with("mar_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_oh = base::rowSums(dplyr::select(., starts_with("oh_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_pb = base::rowSums(dplyr::select(., starts_with("pb_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_coc = base::rowSums(dplyr::select(., starts_with("coc_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_otr = base::rowSums(dplyr::select(., starts_with("otr_")))) %>% 
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_mar>0~1,TRUE~0)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_oh>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_pb>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_coc>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_otr>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_to_imputation=dplyr::case_when(sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_mar>0~"Marijuana",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_oh>0~"Alcohol",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_otr>0~"Other",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_mar>0~"Marijuana",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_oh>0~"Alcohol",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_otr>0~"Other")) %>% 
  janitor::clean_names()

sus_ini_mod_mvv_imputed<-
dplyr::select(sus_ini_mod_mvv_imputed,amelia_fit_imputations_imp1_row,sus_ini_mod_mvv_to_imputation)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss2<-
CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
   dplyr::left_join(sus_ini_mod_mvv_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(sus_ini_mod_mvv=factor(dplyr::case_when(is.na(sus_ini_mod_mvv)~as.character(sus_ini_mod_mvv_to_imputation),
                                 TRUE~as.character(sus_ini_mod_mvv)))) %>% 
  dplyr::select(-sus_ini_mod_mvv_to_imputation) %>% 
  data.table()
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#


Frequency of Use of the Primary Drug at Admission

Another variable that is worth imputing is the Frequency of use of primary drug at admission (n= 568). In case of ties, we selected the imputed values with the value with the most frequent drug use.


# Ver distintos valores propuestos para sustancia de inciio
freq_cons_sus_prin_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$freq_cons_sus_prin,
       amelia_fit$imputations$imp2$freq_cons_sus_prin,
       amelia_fit$imputations$imp3$freq_cons_sus_prin,
       amelia_fit$imputations$imp4$freq_cons_sus_prin,
       amelia_fit$imputations$imp5$freq_cons_sus_prin,
       amelia_fit$imputations$imp6$freq_cons_sus_prin,
       amelia_fit$imputations$imp7$freq_cons_sus_prin,
       amelia_fit$imputations$imp8$freq_cons_sus_prin,
       amelia_fit$imputations$imp9$freq_cons_sus_prin,
       amelia_fit$imputations$imp10$freq_cons_sus_prin,
       amelia_fit$imputations$imp11$freq_cons_sus_prin,
       amelia_fit$imputations$imp12$freq_cons_sus_prin,
       amelia_fit$imputations$imp13$freq_cons_sus_prin,
       amelia_fit$imputations$imp14$freq_cons_sus_prin,
       amelia_fit$imputations$imp15$freq_cons_sus_prin,
       amelia_fit$imputations$imp16$freq_cons_sus_prin,
       amelia_fit$imputations$imp17$freq_cons_sus_prin,
       amelia_fit$imputations$imp18$freq_cons_sus_prin,
       amelia_fit$imputations$imp19$freq_cons_sus_prin,
       amelia_fit$imputations$imp20$freq_cons_sus_prin,
       amelia_fit$imputations$imp21$freq_cons_sus_prin,
       amelia_fit$imputations$imp22$freq_cons_sus_prin,
       amelia_fit$imputations$imp23$freq_cons_sus_prin,
       amelia_fit$imputations$imp24$freq_cons_sus_prin,
       amelia_fit$imputations$imp25$freq_cons_sus_prin,
       amelia_fit$imputations$imp26$freq_cons_sus_prin,
       amelia_fit$imputations$imp27$freq_cons_sus_prin,
       amelia_fit$imputations$imp28$freq_cons_sus_prin,
       amelia_fit$imputations$imp29$freq_cons_sus_prin,
       amelia_fit$imputations$imp30$freq_cons_sus_prin
       ) 

freq_cons_sus_prin_imputed<-
freq_cons_sus_prin_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("1 day a week or more",as.character(.))~1,TRUE~0), .names="1_day_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("2 to 3 days a week",as.character(.))~1,TRUE~0), .names="2_3_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("4 to 6 days a week",as.character(.))~1,TRUE~0), .names="4_6_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Less than 1 day a week",as.character(.))~1,TRUE~0), .names="less_1_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Did not use",as.character(.))~1,TRUE~0), .names="did_not_{col}"))%>%
    dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Daily",as.character(.))~1,TRUE~0), .names="daily_{col}"))%>%
  dplyr::mutate(freq_cons_sus_prin_daily = base::rowSums(dplyr::select(., starts_with("daily_")))) %>% 
  dplyr::mutate(freq_cons_sus_prin_4_6 = base::rowSums(dplyr::select(., starts_with("4_6_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_2_3 = base::rowSums(dplyr::select(., starts_with("2_3_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_1_day = base::rowSums(dplyr::select(., starts_with("1_day_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_less_1 = base::rowSums(dplyr::select(., starts_with("less_1_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_did_not = base::rowSums(dplyr::select(., starts_with("did_not_")))) %>% 
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_1_day>0~1,TRUE~0)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_2_3>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_4_6>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_less_1>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_did_not>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_daily>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  #hierarchy
  dplyr::mutate(freq_cons_sus_prin_to_imputation=
                  dplyr::case_when(freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_daily>0~"Daily",
                                     freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_did_not>0~"Did not use",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_daily>0~"Daily",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_did_not>0~"Did not use")) %>% 
  janitor::clean_names()

freq_cons_sus_prin_imputed<-
dplyr::select(freq_cons_sus_prin_imputed,amelia_fit_imputations_imp1_row,freq_cons_sus_prin_to_imputation)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss3<-
CONS_C1_df_dup_SEP_2020_match_miss2 %>% 
   dplyr::left_join(freq_cons_sus_prin_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(freq_cons_sus_prin=factor(dplyr::case_when(is.na(freq_cons_sus_prin)~as.character(freq_cons_sus_prin_to_imputation), TRUE~as.character(freq_cons_sus_prin)))) %>% 
  data.table()


Educational Attainment

Another variable that is worth imputing is the Educational Attainment (n= 437). we followed the rules stated in Duplicates4 process (link). We were particularly cautious to impute attainments that would follow a progression from primary school to more than high school. For this purpose, we first looked over the actual values per user, filling intermediate gaps in educational attainment in users with intermediate null values (a), we overcame with the difficulty of the incorrect imputations, by logically selecting if there were any .


# Ver distintos valores propuestos para sustancia de inciio
escolaridad_rec_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
                  amelia_fit$imputations$imp1$hash_key,
                  amelia_fit$imputations$imp1$fech_ing_num,
                  amelia_fit$imputations$imp1$escolaridad_rec,
                  amelia_fit$imputations$imp2$escolaridad_rec,
                  amelia_fit$imputations$imp3$escolaridad_rec,
                  amelia_fit$imputations$imp4$escolaridad_rec,
                  amelia_fit$imputations$imp5$escolaridad_rec,
                  amelia_fit$imputations$imp6$escolaridad_rec,
                  amelia_fit$imputations$imp7$escolaridad_rec,
                  amelia_fit$imputations$imp8$escolaridad_rec,
                  amelia_fit$imputations$imp9$escolaridad_rec,
                  amelia_fit$imputations$imp10$escolaridad_rec,
                  amelia_fit$imputations$imp11$escolaridad_rec,
                  amelia_fit$imputations$imp12$escolaridad_rec,
                  amelia_fit$imputations$imp13$escolaridad_rec,
                  amelia_fit$imputations$imp14$escolaridad_rec,
                  amelia_fit$imputations$imp15$escolaridad_rec,
                  amelia_fit$imputations$imp16$escolaridad_rec,
                  amelia_fit$imputations$imp17$escolaridad_rec,
                  amelia_fit$imputations$imp18$escolaridad_rec,
                  amelia_fit$imputations$imp19$escolaridad_rec,
                  amelia_fit$imputations$imp20$escolaridad_rec,
                  amelia_fit$imputations$imp21$escolaridad_rec,
                  amelia_fit$imputations$imp22$escolaridad_rec,
                  amelia_fit$imputations$imp23$escolaridad_rec,
                  amelia_fit$imputations$imp24$escolaridad_rec,
                  amelia_fit$imputations$imp25$escolaridad_rec,
                  amelia_fit$imputations$imp26$escolaridad_rec,
                  amelia_fit$imputations$imp27$escolaridad_rec,
                  amelia_fit$imputations$imp28$escolaridad_rec,
                  amelia_fit$imputations$imp29$escolaridad_rec,
                  amelia_fit$imputations$imp30$escolaridad_rec) 

escolaridad_rec_imputed2<-
escolaridad_rec_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("3-Completed primary school or less",as.character(.))~1,TRUE~0), .names="3_primary_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("2-Completed high school or less",as.character(.))~1,TRUE~0), .names="2_high_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("1-More than high school",as.character(.))~1,TRUE~0), .names="1_more_high_{col}")) %>% 

  dplyr::mutate(escolaridad_rec_3_primary = base::rowSums(dplyr::select(., contains("3_primary_")))) %>% 
  dplyr::mutate(escolaridad_rec_2_high = base::rowSums(dplyr::select(., contains("2_high_"))))%>%
  dplyr::mutate(escolaridad_rec_1_more_high = base::rowSums(dplyr::select(., contains("1_more_high_"))))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#create an ordered index of the number of treatments by user
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

#:#:#:#;#;#;
CONS_C1_df_dup_SEP_2020_match_rn<-
    CONS_C1_df_dup_SEP_2020_match_miss %>%  #base de datos original, sin imputaciones
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(rn=row_number()) %>% 
    dplyr::ungroup() %>% 
    dplyr::select(rn)
#:#:#:#;#;#;
escolaridad_rec_imputed3<-
escolaridad_rec_imputed2 %>%   
  dplyr::left_join(cbind.data.frame(CONS_C1_df_dup_SEP_2020_match_miss$row, CONS_C1_df_dup_SEP_2020_match_miss$escolaridad_rec,CONS_C1_df_dup_SEP_2020_match_rn$rn),by=c("amelia_fit.imputations.imp1.row"="CONS_C1_df_dup_SEP_2020_match_miss$row")) %>%
  dplyr::rename("escolaridad_rec_original"="CONS_C1_df_dup_SEP_2020_match_miss$escolaridad_rec") %>%
  dplyr::mutate(escolaridad_rec_original=as.numeric(substr(escolaridad_rec_original, 1, 1))) %>%
  #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  #ordenar por tratamientos por usuario
  #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::arrange(amelia_fit.imputations.imp1.hash_key,`CONS_C1_df_dup_SEP_2020_match_rn$rn`) %>% 
  dplyr::group_by(amelia_fit.imputations.imp1.hash_key) %>%  
  dplyr::mutate(siguiente_escolaridad_rec_original=lead(escolaridad_rec_original), 
                subsig_escolaridad_rec_original=lead(escolaridad_rec_original,n =2), 
                rn=max(`CONS_C1_df_dup_SEP_2020_match_rn$rn`),
                n_na_esc_or=is.na(escolaridad_rec_original),
                sum_n_na_esc_or=sum(n_na_esc_or,na.rm=T),
                max_sum_n_na_esc_or=max(n_na_esc_or,na.rm=T)
                ) %>% 
#dplyr::select(amelia_fit.imputations.imp1.hash_key,amelia_fit.imputations.imp30.rn,
#              siguiente_escolaridad_rec_original,escolaridad_rec_original,amelia_fit.imputations.imp1.fech_ing_num)%>% View()
  dplyr::ungroup()

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#PREPARACIÓN  BASE DE DATOS PARA IMPUTACION Y CREACIÓN DE VARIABLES PARA CONDICIONES
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
escolaridad_rec_imputed4 <-
escolaridad_rec_imputed3 %>% 
  dplyr::select(amelia_fit.imputations.imp1.hash_key,`CONS_C1_df_dup_SEP_2020_match_rn$rn`,escolaridad_rec_original,escolaridad_rec_3_primary,escolaridad_rec_2_high, escolaridad_rec_1_more_high) %>%
  dplyr::rename("hash_key"="amelia_fit.imputations.imp1.hash_key") %>% 
  dplyr::rename("treat_no_for_usr"="CONS_C1_df_dup_SEP_2020_match_rn$rn") %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(treat_per_usr=max(treat_no_for_usr,na.rm=T)) %>% 
  dplyr::ungroup() %>% 
  tidyr::pivot_wider(names_from=treat_no_for_usr,
                     #names_glue = "ord_treat_esc_{.value}",
                     values_from=c(escolaridad_rec_original,escolaridad_rec_3_primary,escolaridad_rec_2_high,escolaridad_rec_1_more_high),values_fill = NA) %>% 
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
#Ver si existen inconsistencias en la escolaridad, pero no sólo inconsistencias inmediatas, sino con hasta 2 espacios entre tratamientos
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_tot_cond=dplyr::case_when(
    (escolaridad_rec_original_10>escolaridad_rec_original_9)|(escolaridad_rec_original_10>escolaridad_rec_original_8)|(escolaridad_rec_original_10>escolaridad_rec_original_7)|
      (escolaridad_rec_original_9>escolaridad_rec_original_8)|(escolaridad_rec_original_9>escolaridad_rec_original_7)|(escolaridad_rec_original_9>escolaridad_rec_original_6)|
      (escolaridad_rec_original_8>escolaridad_rec_original_7)|(escolaridad_rec_original_8>escolaridad_rec_original_6)|(escolaridad_rec_original_8>escolaridad_rec_original_5)|
      (escolaridad_rec_original_7>escolaridad_rec_original_6)|(escolaridad_rec_original_7>escolaridad_rec_original_5)|(escolaridad_rec_original_7>escolaridad_rec_original_4)|
      (escolaridad_rec_original_6>escolaridad_rec_original_5)|(escolaridad_rec_original_6>escolaridad_rec_original_4)|(escolaridad_rec_original_6>escolaridad_rec_original_3)|
      (escolaridad_rec_original_5>escolaridad_rec_original_4)|(escolaridad_rec_original_5>escolaridad_rec_original_3)|(escolaridad_rec_original_5>escolaridad_rec_original_2)|
      (escolaridad_rec_original_4>escolaridad_rec_original_3)|(escolaridad_rec_original_4>escolaridad_rec_original_2)|(escolaridad_rec_original_4>escolaridad_rec_original_1)|
      (escolaridad_rec_original_3>escolaridad_rec_original_2)|(escolaridad_rec_original_3>escolaridad_rec_original_1)|
      (escolaridad_rec_original_2>escolaridad_rec_original_1)~1,TRUE~0)) %>% 
  #dplyr::filter(escolaridad_rec_tot_cond==1) %>% #View() #0 rows ¿y 374745c85601976177fe614a7370e475?
  #dplyr::filter(treat_per_usr>1) %>% 
  #:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  # Ver si hay valores de escolaridad ausentes en una progresión de tratamientos
  #:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  dplyr::mutate(sum_nas_esc=base::rowSums(is.na(dplyr::select(., starts_with("escolaridad_rec_original_")))))%>%
  
  dplyr::mutate(escolaridad_rec_tot_nas_en_medio=dplyr::case_when(
      (sum_nas_esc>10 & treat_per_usr==10)|
      (sum_nas_esc>1 & treat_per_usr==9)|
      (sum_nas_esc>2 & treat_per_usr==8)|
      (sum_nas_esc>3 & treat_per_usr==7)|
      (sum_nas_esc>4 & treat_per_usr==6)|
      (sum_nas_esc>5 & treat_per_usr==5)|
      (sum_nas_esc>6 & treat_per_usr==4)|
      (sum_nas_esc>7 & treat_per_usr==3)|
      (sum_nas_esc>8 & treat_per_usr==2)|
      (sum_nas_esc>9 & treat_per_usr==1)~1,TRUE~0)) %>% #18b1f9646a2cd6bebd962637cff0a21a 5 casos
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  #Generar la escolaridad al final
  #:#:#:#:#:#:#:#:#
  dplyr::mutate(last_esc=dplyr::case_when(treat_per_usr==10~escolaridad_rec_original_10,
                                          treat_per_usr==9~escolaridad_rec_original_9,
                                          treat_per_usr==8~escolaridad_rec_original_8,
                                          treat_per_usr==7~escolaridad_rec_original_7,
                                          treat_per_usr==6~escolaridad_rec_original_6,
                                          treat_per_usr==5~escolaridad_rec_original_5,
                                          treat_per_usr==4~escolaridad_rec_original_4,
                                          treat_per_usr==3~escolaridad_rec_original_3,
                                          treat_per_usr==2~escolaridad_rec_original_2,
                                          treat_per_usr==1~escolaridad_rec_original_1)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#a0))si valor final vs. inicial son iguales, imputar todo lo que está en medio con el mismo valor
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_original_9=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>9 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_9)) %>% 
  dplyr::mutate(escolaridad_rec_original_8=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>8 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_8)) %>% 
  dplyr::mutate(escolaridad_rec_original_7=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>7 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_7)) %>% 
  dplyr::mutate(escolaridad_rec_original_6=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>6 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_6)) %>% 
  dplyr::mutate(escolaridad_rec_original_5=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>5 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_5)) %>% 
  dplyr::mutate(escolaridad_rec_original_4=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>4 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_4)) %>% 
  dplyr::mutate(escolaridad_rec_original_3=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>3 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_3)) %>% 
  dplyr::mutate(escolaridad_rec_original_2=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>2 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_2)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#a1))cambiar valores vacíos intermedios  /// fijarse en  & escolaridad_rec_tot_cond==1
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#reemplazar el número intermedio por cada tratamiento para cada usuario
  dplyr::mutate(escolaridad_rec_original_9=dplyr::case_when(escolaridad_rec_original_8==escolaridad_rec_original_10 & is.na(escolaridad_rec_original_9)&!is.na(escolaridad_rec_original_10)~escolaridad_rec_original_10,TRUE~escolaridad_rec_original_9)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_10)) %>% View()
  dplyr::mutate(escolaridad_rec_original_8=dplyr::case_when(escolaridad_rec_original_7==escolaridad_rec_original_9 & is.na(escolaridad_rec_original_8)&!is.na(escolaridad_rec_original_9)~escolaridad_rec_original_9,TRUE~escolaridad_rec_original_8)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_9)) %>% View()
  dplyr::mutate(escolaridad_rec_original_7=dplyr::case_when(escolaridad_rec_original_6==escolaridad_rec_original_8 & is.na(escolaridad_rec_original_7)&!is.na(escolaridad_rec_original_8)~escolaridad_rec_original_8 ,TRUE~escolaridad_rec_original_7)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_8)) %>% View()
  dplyr::mutate(escolaridad_rec_original_6=dplyr::case_when(escolaridad_rec_original_5==escolaridad_rec_original_7& is.na(escolaridad_rec_original_6)&!is.na(escolaridad_rec_original_7)~escolaridad_rec_original_7,TRUE~escolaridad_rec_original_6)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_7)) %>% View()
  dplyr::mutate(escolaridad_rec_original_5=dplyr::case_when(escolaridad_rec_original_4==escolaridad_rec_original_6  & is.na(escolaridad_rec_original_5)&!is.na(escolaridad_rec_original_6)~escolaridad_rec_original_6,TRUE~escolaridad_rec_original_5)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_6)) %>% View()
  dplyr::mutate(escolaridad_rec_original_4=dplyr::case_when(escolaridad_rec_original_3==escolaridad_rec_original_5  & is.na(escolaridad_rec_original_4)&!is.na(escolaridad_rec_original_5)~escolaridad_rec_original_5,TRUE~escolaridad_rec_original_4)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_5)) %>% View()
  dplyr::mutate(escolaridad_rec_original_3=dplyr::case_when(escolaridad_rec_original_2==escolaridad_rec_original_4  & is.na(escolaridad_rec_original_3)&!is.na(escolaridad_rec_original_4)~escolaridad_rec_original_4,TRUE~escolaridad_rec_original_3)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_4)) %>% View()
  dplyr::mutate(escolaridad_rec_original_2=dplyr::case_when(escolaridad_rec_original_1==escolaridad_rec_original_3  & is.na(escolaridad_rec_original_2)&!is.na(escolaridad_rec_original_3)~escolaridad_rec_original_3,TRUE~escolaridad_rec_original_2)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_3)) %>% View()
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
##a2))si tiene información en la segunda pero no en la primera, y no es un valor intermedio como secundaria completa (ya que en ese caso puede adoptar más de un valor: más o igual a ese valor), imputarlo
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(escolaridad_rec_original_2==3~3,
                                                            escolaridad_rec_original_2==1~1,
                                                            TRUE~escolaridad_rec_original_1)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
##a3))si hay más de 2 tratamientos por usuarios, y tiene información en la segunda pero no en la primera, y es un valor intermedio pero tiene un tercer tratamiento con el mismo valor, imputarlo
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
    dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(escolaridad_rec_original_2==2 & escolaridad_rec_original_3==2~3,TRUE~escolaridad_rec_original_1))  %>% 

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#medidas para capturar inconsistencias a lo largo de todos los tratamientos de cada usuario
#escolaridad_rec_imputed4 %>% #escolaridad_rec_tot_cond
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==1~1,TRUE~0), .names="1_more_high_{col}")) %>% 
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==2~1,TRUE~0), .names="2_high_{col}")) %>% 
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==3~1,TRUE~0), .names="3_primary_{col}")) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_1_more_high = base::rowSums(dplyr::select(., starts_with("1_more_high_")))) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_2_high = base::rowSums(dplyr::select(., starts_with("2_high_")))) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_3_primary = base::rowSums(dplyr::select(., starts_with("3_primary_"))))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#IMPUTACIONES
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
escolaridad_rec_imputed5<-
escolaridad_rec_imputed4 %>% 
  #hacer una suma de más NA's de los que debería tener según la cantidad de tratamientos que tiene la persona
  #:#:#:#:#:#:#:#:#:
  dplyr::mutate(sum_nas_esc_post=base::rowSums(is.na(dplyr::select(., starts_with("escolaridad_rec_original_")))))%>%
  dplyr::mutate(escolaridad_rec_tot_nas_en_medio_post=dplyr::case_when(
      (sum_nas_esc_post>10 & treat_per_usr==10)|
      (sum_nas_esc_post>1 & treat_per_usr==9)|
      (sum_nas_esc_post>2 & treat_per_usr==8)|
      (sum_nas_esc_post>3 & treat_per_usr==7)|
      (sum_nas_esc_post>4 & treat_per_usr==6)|
      (sum_nas_esc_post>5 & treat_per_usr==5)|
      (sum_nas_esc_post>6 & treat_per_usr==4)|
      (sum_nas_esc_post>7 & treat_per_usr==3)|
      (sum_nas_esc_post>8 & treat_per_usr==2)|
      (sum_nas_esc_post>9 & treat_per_usr==1)~1,TRUE~0)) %>%
  #dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
  #d864967fa0b1c5bb1d4eb5f6a7c8c2c1
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b0))valor inicial y sólo un tratamiento, se imputa por el valor imputado más frecuente de las 30 bases de datos
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_3_primary_1>escolaridad_rec_2_high_1)& 
      (escolaridad_rec_2_high_1>escolaridad_rec_3_primary_1)~3,
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_2_high_1>escolaridad_rec_3_primary_1)& 
      (escolaridad_rec_2_high_1>escolaridad_rec_1_more_high_1)~2,
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_1_more_high_1>escolaridad_rec_3_primary_1)& 
      (escolaridad_rec_1_more_high_1>escolaridad_rec_2_high_1)~1,
    TRUE~escolaridad_rec_original_1)) %>% 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b1))valor en el segundo tratamiento es intermedio, inicial se imputa, dependiendo si primaria es mayor que intermedio o no
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  ###
  #dplyr::filter(is.na(escolaridad_rec_original_1),!is.na(escolaridad_rec_original_2)) %>%
  #dplyr::select(escolaridad_rec_original_1,escolaridad_rec_original_2,escolaridad_rec_3_primary_1,escolaridad_rec_2_high_1,escolaridad_rec_1_more_high_1) %>% View()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#

  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(
    is.na(escolaridad_rec_original_1) & escolaridad_rec_original_2==2 & (escolaridad_rec_3_primary_1>escolaridad_rec_2_high_1)~3,
    is.na(escolaridad_rec_original_1) & escolaridad_rec_original_2==2 & (escolaridad_rec_3_primary_1<escolaridad_rec_2_high_1)~2,TRUE~escolaridad_rec_original_1))%>%
    #dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
#610dd4dba4dbb62848691b6916828948
  #90d581cd11064c41b82f8e4d6ff7b70b
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b2))Valor final es vacío, hay un valor anterior
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_  
  dplyr::mutate(escolaridad_rec_original_10= dplyr::case_when(
  #
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==1~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==2 & 
      (escolaridad_rec_1_more_high_10>escolaridad_rec_2_high_10)~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==2 & 
      (escolaridad_rec_1_more_high_10<escolaridad_rec_2_high_10)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
      (escolaridad_rec_1_more_high_10>escolaridad_rec_2_high_10) & (escolaridad_rec_1_more_high_10>escolaridad_rec_3_primary_10)~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
        (escolaridad_rec_2_high_10 >escolaridad_rec_1_more_high_10) & (escolaridad_rec_2_high_10>escolaridad_rec_3_primary_10)~2,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
      (escolaridad_rec_3_primary_10 >escolaridad_rec_2_high_10) & (escolaridad_rec_3_primary_10>escolaridad_rec_1_more_high_10)~2,TRUE~escolaridad_rec_original_10)) %>% 
 # dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
  #
    dplyr::mutate(escolaridad_rec_original_9= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==1~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==2 & 
      (escolaridad_rec_1_more_high_9>escolaridad_rec_2_high_9)~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==2 & 
      (escolaridad_rec_1_more_high_9<escolaridad_rec_2_high_9)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
      (escolaridad_rec_1_more_high_9>escolaridad_rec_2_high_9) & (escolaridad_rec_1_more_high_9>escolaridad_rec_3_primary_9)~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
        (escolaridad_rec_2_high_9 >escolaridad_rec_1_more_high_9) & (escolaridad_rec_2_high_9>escolaridad_rec_3_primary_9)~2,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
      (escolaridad_rec_3_primary_9 >escolaridad_rec_2_high_9) & (escolaridad_rec_3_primary_9>escolaridad_rec_1_more_high_9)~2,TRUE~escolaridad_rec_original_9)) %>% 
  #
        dplyr::mutate(escolaridad_rec_original_8= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==1~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==2 & 
      (escolaridad_rec_1_more_high_8>escolaridad_rec_2_high_8)~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==2 & 
      (escolaridad_rec_1_more_high_8<escolaridad_rec_2_high_8)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
      (escolaridad_rec_1_more_high_8>escolaridad_rec_2_high_8) & (escolaridad_rec_1_more_high_8>escolaridad_rec_3_primary_8)~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
        (escolaridad_rec_2_high_8 >escolaridad_rec_1_more_high_8) & (escolaridad_rec_2_high_8>escolaridad_rec_3_primary_8)~2,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
      (escolaridad_rec_3_primary_8 >escolaridad_rec_2_high_8) & (escolaridad_rec_3_primary_8>escolaridad_rec_1_more_high_8)~2,TRUE~escolaridad_rec_original_8)) %>% 
  #
        dplyr::mutate(escolaridad_rec_original_7= dplyr::case_when(
          #si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==1~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==2 & 
      (escolaridad_rec_1_more_high_7>escolaridad_rec_2_high_7)~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==2 & 
      (escolaridad_rec_1_more_high_7<escolaridad_rec_2_high_7)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
      (escolaridad_rec_1_more_high_7>escolaridad_rec_2_high_7) & (escolaridad_rec_1_more_high_7>escolaridad_rec_3_primary_7)~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
        (escolaridad_rec_2_high_7 >escolaridad_rec_1_more_high_7) & (escolaridad_rec_2_high_7>escolaridad_rec_3_primary_7)~2,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
      (escolaridad_rec_3_primary_7 >escolaridad_rec_2_high_7) & (escolaridad_rec_3_primary_7>escolaridad_rec_1_more_high_7)~2,TRUE~escolaridad_rec_original_7)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_6= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==1~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==2 & 
      (escolaridad_rec_1_more_high_6>escolaridad_rec_2_high_6)~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==2 & 
      (escolaridad_rec_1_more_high_6<escolaridad_rec_2_high_6)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
      (escolaridad_rec_1_more_high_6>escolaridad_rec_2_high_6) & (escolaridad_rec_1_more_high_6>escolaridad_rec_3_primary_6)~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
        (escolaridad_rec_2_high_6 >escolaridad_rec_1_more_high_6) & (escolaridad_rec_2_high_6>escolaridad_rec_3_primary_6)~2,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
      (escolaridad_rec_3_primary_6 >escolaridad_rec_2_high_6) & (escolaridad_rec_3_primary_6>escolaridad_rec_1_more_high_6)~2,TRUE~escolaridad_rec_original_6)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_5= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==1~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==2 & 
      (escolaridad_rec_1_more_high_5>escolaridad_rec_2_high_5)~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==2 & 
      (escolaridad_rec_1_more_high_5<escolaridad_rec_2_high_5)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
      (escolaridad_rec_1_more_high_5>escolaridad_rec_2_high_5) & (escolaridad_rec_1_more_high_5>escolaridad_rec_3_primary_5)~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
        (escolaridad_rec_2_high_5 >escolaridad_rec_1_more_high_5) & (escolaridad_rec_2_high_5>escolaridad_rec_3_primary_5)~2,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
      (escolaridad_rec_3_primary_5 >escolaridad_rec_2_high_5) & (escolaridad_rec_3_primary_5>escolaridad_rec_1_more_high_5)~2,TRUE~escolaridad_rec_original_5)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_4= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==1~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_4>escolaridad_rec_2_high_4)~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_4<escolaridad_rec_2_high_4)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
      (escolaridad_rec_1_more_high_4>escolaridad_rec_2_high_4) & (escolaridad_rec_1_more_high_4>escolaridad_rec_3_primary_4)~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
        (escolaridad_rec_2_high_4 >escolaridad_rec_1_more_high_4) & (escolaridad_rec_2_high_4>escolaridad_rec_3_primary_4)~2,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
      (escolaridad_rec_3_primary_4 >escolaridad_rec_2_high_4) & (escolaridad_rec_3_primary_4>escolaridad_rec_1_more_high_4)~2,TRUE~escolaridad_rec_original_4)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_3= dplyr::case_when(
#si la educación en el tratamiento anterior es la máxima, imputar con el mismo valor
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==1~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuál es el valor más creible (conserva intermedio o logra universitario)    
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_3>escolaridad_rec_2_high_3)~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_3<escolaridad_rec_2_high_3)~2,
#si la educación en el tratamiento anterior es la más baja, ver cuál es el valor más creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
      (escolaridad_rec_1_more_high_3>escolaridad_rec_2_high_3) & (escolaridad_rec_1_more_high_3>escolaridad_rec_3_primary_3)~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
        (escolaridad_rec_2_high_3 >escolaridad_rec_1_more_high_3) & (escolaridad_rec_2_high_3>escolaridad_rec_3_primary_3)~2,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
      (escolaridad_rec_3_primary_3 >escolaridad_rec_2_high_3) & (escolaridad_rec_3_primary_3>escolaridad_rec_1_more_high_3)~2,TRUE~escolaridad_rec_original_3))
#:#:#:#:
 # dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
 #:#:#:#:
  #comprobar si en verdad calza:
  #%>%dplyr::filter(hash_key=="ef4325cda7ddd92f6218bb910c3e0895") %>% dplyr::select(escolaridad_rec_original_1,escolaridad_rec_original_2,treat_per_usr,escolaridad_rec_3_primary_1,escolaridad_rec_2_high_1)
  #610dd4dba4dbb62848691b6916828948
  #90d581cd11064c41b82f8e4d6ff7b70b
#escolaridad_rec_imputed5 %>% 
#    dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)%>%dplyr::filter(hash_key=="98d6644d995ea2c8777a683160728004") %>% dplyr::select(escolaridad_rec_original_3,escolaridad_rec_original_4,escolaridad_rec_original_4,treat_per_usr,escolaridad_rec_3_primary_4,escolaridad_rec_2_high_4,escolaridad_rec_1_more_high_4)

#98d6644d995ea2c8777a683160728004
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b2))Valor final es vacío, hay un valor anterior
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_  
escolaridad_rec_imputed6<-
escolaridad_rec_imputed5 %>% 
#dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)%>%dplyr::filter(hash_key=="98d6644d995ea2c8777a683160728004") %>% dplyr::select(escolaridad_rec_original_4,escolaridad_rec_original_4,treat_per_usr,escolaridad_rec_3_primary_4,escolaridad_rec_2_high_4,escolaridad_rec_1_more_high_3)
  dplyr::select(hash_key,starts_with("escolaridad_rec_original_")) %>%
  tidyr::pivot_longer(cols = starts_with("escolaridad_rec_original_"),
   names_to = "rn",
   names_prefix = "escolaridad_rec_original_") %>% 
  dplyr::filter(!is.na(value)) %>% 
  dplyr::mutate(hash_rn=paste0(hash_key,"_",rn)) %>% 
  dplyr::select(hash_rn,value)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss4<-
CONS_C1_df_dup_SEP_2020_match_miss3 %>%
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(rn=row_number()) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(hash_rn=paste0(hash_key,"_",rn)) %>% 
  dplyr::left_join(escolaridad_rec_imputed6, by=c("hash_rn")) %>% 
  dplyr::mutate(escolaridad_rec=dplyr::case_when(value==1~"1-More than high school",value==2~"2-Completed high school or less",value==3~"3-Completed primary school or less")) %>% 
  #
  dplyr::arrange(hash_key,rn) %>% 
  #dplyr::mutate(escolaridad_rec=dplyr::case_when(is.na(escolaridad_rec)~value,TRUE~as.character(escolaridad_rec))) %>% 
  dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered =F,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%
  dplyr::select(-value,-hash_rn) %>% 
  data.table()

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
paste("Check inconsistencies with posterior educational attainments (0= No inconsistencies):",CONS_C1_df_dup_SEP_2020_match_miss4 %>% 
  dplyr::arrange(hash_key,rn) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(escolaridad_rec_num=as.numeric(substr(escolaridad_rec, 1, 1)),
                sig_escolaridad_rec_num=lead(escolaridad_rec_num),
                ant_escolaridad_rec_num=lag(escolaridad_rec_num)) %>% 
  dplyr::ungroup() %>% 
  dplyr::filter(escolaridad_rec_num>ant_escolaridad_rec_num) %>% 
  dplyr::select(hash_key,rn,fech_ing_num, escolaridad_rec, escolaridad_rec_num, sig_escolaridad_rec_num,ant_escolaridad_rec_num) %>% 
  nrow())
## [1] "Check inconsistencies with posterior educational attainments (0= No inconsistencies): 0"


We ended having 241 missing values in educational attainment (users=238), because the imputed values did not fulfilled the requirements of a progression of the educational attainment (eg., a user could not respond to have completed secondary school, but then answer that he had completed primary school only), for example, due to ties in the imputed values or no imputed values.


Marital status

Additionally, we replaced missing values of the marital status (n=198). Since different marital status were not particularly more vulnerable between each other, we selected the most frequent imputed value among the different imputed databases.


# Ver distintos valores propuestos para estado conyugal
estado_conyugal_2_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$estado_conyugal_2,
       amelia_fit$imputations$imp2$estado_conyugal_2,
       amelia_fit$imputations$imp3$estado_conyugal_2,
       amelia_fit$imputations$imp4$estado_conyugal_2,
       amelia_fit$imputations$imp5$estado_conyugal_2,
       amelia_fit$imputations$imp6$estado_conyugal_2,
       amelia_fit$imputations$imp7$estado_conyugal_2,
       amelia_fit$imputations$imp8$estado_conyugal_2,
       amelia_fit$imputations$imp9$estado_conyugal_2,
       amelia_fit$imputations$imp10$estado_conyugal_2,
       amelia_fit$imputations$imp11$estado_conyugal_2,
       amelia_fit$imputations$imp12$estado_conyugal_2,
       amelia_fit$imputations$imp13$estado_conyugal_2,
       amelia_fit$imputations$imp14$estado_conyugal_2,
       amelia_fit$imputations$imp15$estado_conyugal_2,
       amelia_fit$imputations$imp16$estado_conyugal_2,
       amelia_fit$imputations$imp17$estado_conyugal_2,
       amelia_fit$imputations$imp18$estado_conyugal_2,
       amelia_fit$imputations$imp19$estado_conyugal_2,
       amelia_fit$imputations$imp20$estado_conyugal_2,
       amelia_fit$imputations$imp21$estado_conyugal_2,
       amelia_fit$imputations$imp22$estado_conyugal_2,
       amelia_fit$imputations$imp23$estado_conyugal_2,
       amelia_fit$imputations$imp24$estado_conyugal_2,
       amelia_fit$imputations$imp25$estado_conyugal_2,
       amelia_fit$imputations$imp26$estado_conyugal_2,
       amelia_fit$imputations$imp27$estado_conyugal_2,
       amelia_fit$imputations$imp28$estado_conyugal_2,
       amelia_fit$imputations$imp29$estado_conyugal_2,
       amelia_fit$imputations$imp30$estado_conyugal_2
       ) 

estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Married/Shared living arrangements",as.character(.))~1,TRUE~0), .names="married_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Separated/Divorced",as.character(.))~1,TRUE~0), .names="sep_div_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Single",as.character(.))~1,TRUE~0), .names="singl_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Widower",as.character(.))~1,TRUE~0), .names="widow_{col}"))%>%
 
  dplyr::mutate(estado_conyugal_2_married = base::rowSums(dplyr::select(., starts_with("married_"))))%>%
  dplyr::mutate(estado_conyugal_2_sep_div = base::rowSums(dplyr::select(., starts_with("sep_div_"))))%>%
  dplyr::mutate(estado_conyugal_2_singl = base::rowSums(dplyr::select(., starts_with("singl_"))))%>%
  dplyr::mutate(estado_conyugal_2_wid = base::rowSums(dplyr::select(., starts_with("widow_"))))%>%
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_married>0~1,TRUE~0)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_sep_div>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_singl>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_wid>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  janitor::clean_names()
  
estado_conyugal_2_imputed_cat_est_cony<-  
    estado_conyugal_2_imputed %>%
        tidyr::pivot_longer(c(estado_conyugal_2_married, estado_conyugal_2_sep_div, estado_conyugal_2_singl, estado_conyugal_2_wid), names_to = "cat_est_conyugal", values_to = "count") %>%
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(estado_conyugal_2_imputed_max=max(count,na.rm=T)) %>% 
        dplyr::ungroup() %>% 
        dplyr::filter(estado_conyugal_2_imputed_max==count) %>% 
        dplyr::select(amelia_fit_imputations_imp1_row,cat_est_conyugal,count) %>% 
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(n_row=n()) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate(cat_est_conyugal=dplyr::case_when(n_row>1~NA_character_,
                                                        TRUE~cat_est_conyugal)) %>% 
        dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
  
estado_conyugal_2_imputed<-
  estado_conyugal_2_imputed %>% 
    dplyr::left_join(estado_conyugal_2_imputed_cat_est_cony, by="amelia_fit_imputations_imp1_row") %>%
    dplyr::mutate(cat_est_conyugal=dplyr::case_when(cat_est_conyugal=="estado_conyugal_2_married"~"Married/Shared living arrangements",cat_est_conyugal=="estado_conyugal_2_sep_div"~"Separated/Divorced",cat_est_conyugal=="estado_conyugal_2_singl"~"Single",cat_est_conyugal=="estado_conyugal_2_wid"~"Widower"
    ))%>% 
  janitor::clean_names()

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss5<-
CONS_C1_df_dup_SEP_2020_match_miss4 %>% 
   dplyr::left_join(dplyr::select(estado_conyugal_2_imputed,amelia_fit_imputations_imp1_row,cat_est_conyugal), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(cat_est_conyugal),TRUE~as.character(estado_conyugal_2)))) %>% 
  data.table()

no_calzaron_estado_cony<-
CONS_C1_df_dup_SEP_2020_match_miss5 %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(hash_key) %>% unlist()

#CONS_C1_df_dup_SEP_2020_match_miss5 %>% 
#dplyr::filter(hash_key %in% CONS_C1_df_dup_SEP_2020_match_miss5 %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(hash_key) %>% unlist())


We could not resolve Marital status in 14 cases due to ties in the most frequent values.


Region & Type of Center (Public)

We looked over possible imputations to region of the center (n=28) and type of the center (public or private) (n=28).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

#no hay información. debemos imputar
no_mostrar=0
if (no_mostrar==1){
tipo_centro_nombre_region_nas_nombre_region<-
CONS_C1_df_dup_SEP_2020 %>% 
    #dplyr::filter(row %in% unlist(unique(CONS_C1_df_dup_SEP_2020_match[,"row"]))) %>% 
    dplyr::filter(is.na(nombre_region)) %>% 
    janitor::tabyl(tipo_centro, nombre_region) 
}

nombre_region_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$nombre_region,
       amelia_fit$imputations$imp2$nombre_region,
       amelia_fit$imputations$imp3$nombre_region,
       amelia_fit$imputations$imp4$nombre_region,
       amelia_fit$imputations$imp5$nombre_region,
       amelia_fit$imputations$imp6$nombre_region,
       amelia_fit$imputations$imp7$nombre_region,
       amelia_fit$imputations$imp8$nombre_region,
       amelia_fit$imputations$imp9$nombre_region,
       amelia_fit$imputations$imp10$nombre_region,
       amelia_fit$imputations$imp11$nombre_region,
       amelia_fit$imputations$imp12$nombre_region,
       amelia_fit$imputations$imp13$nombre_region,
       amelia_fit$imputations$imp14$nombre_region,
       amelia_fit$imputations$imp15$nombre_region,
       amelia_fit$imputations$imp16$nombre_region,
       amelia_fit$imputations$imp17$nombre_region,
       amelia_fit$imputations$imp18$nombre_region,
       amelia_fit$imputations$imp19$nombre_region,
       amelia_fit$imputations$imp20$nombre_region,
       amelia_fit$imputations$imp21$nombre_region,
       amelia_fit$imputations$imp22$nombre_region,
       amelia_fit$imputations$imp23$nombre_region,
       amelia_fit$imputations$imp24$nombre_region,
       amelia_fit$imputations$imp25$nombre_region,
       amelia_fit$imputations$imp26$nombre_region,
       amelia_fit$imputations$imp27$nombre_region,
       amelia_fit$imputations$imp28$nombre_region,
       amelia_fit$imputations$imp29$nombre_region,
       amelia_fit$imputations$imp30$nombre_region
       ) 
nombre_region_imputed<-
nombre_region_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Antofagasta",as.character(.))~1,TRUE~0), .names="reg_02_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Araucan",as.character(.))~1,TRUE~0), .names="reg_09_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Arica",as.character(.))~1,TRUE~0), .names="reg_15_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Atacama",as.character(.))~1,TRUE~0), .names="reg_03_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Ays",as.character(.))~1,TRUE~0), .names="reg_11_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Biob",as.character(.))~1,TRUE~0), .names="reg_08_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Coquimbo",as.character(.))~1,TRUE~0), .names="reg_04_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los Lagos",as.character(.))~1,TRUE~0), .names="reg_10_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los R",as.character(.))~1,TRUE~0), .names="reg_14_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Magallanes",as.character(.))~1,TRUE~0), .names="reg_12_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Maule",as.character(.))~1,TRUE~0), .names="reg_07_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Metropolitana",as.character(.))~1,TRUE~0), .names="reg_13_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("uble",as.character(.))~1,TRUE~0), .names="reg_16_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Higgins",as.character(.))~1,TRUE~0), .names="reg_06_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Tarapac",as.character(.))~1,TRUE~0), .names="reg_01_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Valpara",as.character(.))~1,TRUE~0), .names="reg_05_{col}"))%>%
  
 
  dplyr::mutate(nombre_region_02 = base::rowSums(dplyr::select(., starts_with("reg_02_"))))%>%
  dplyr::mutate(nombre_region_09 = base::rowSums(dplyr::select(., starts_with("reg_09_"))))%>%
  dplyr::mutate(nombre_region_15 = base::rowSums(dplyr::select(., starts_with("reg_15_"))))%>%
  dplyr::mutate(nombre_region_03 = base::rowSums(dplyr::select(., starts_with("reg_03_"))))%>%
  dplyr::mutate(nombre_region_11 = base::rowSums(dplyr::select(., starts_with("reg_11_"))))%>%
  dplyr::mutate(nombre_region_08 = base::rowSums(dplyr::select(., starts_with("reg_08_"))))%>%
  dplyr::mutate(nombre_region_04 = base::rowSums(dplyr::select(., starts_with("reg_04_"))))%>%
  dplyr::mutate(nombre_region_10 = base::rowSums(dplyr::select(., starts_with("reg_10_"))))%>%
  dplyr::mutate(nombre_region_14 = base::rowSums(dplyr::select(., starts_with("reg_14_"))))%>%
  dplyr::mutate(nombre_region_12 = base::rowSums(dplyr::select(., starts_with("reg_12_"))))%>%
  dplyr::mutate(nombre_region_07 = base::rowSums(dplyr::select(., starts_with("reg_07_"))))%>%
  dplyr::mutate(nombre_region_13 = base::rowSums(dplyr::select(., starts_with("reg_13_"))))%>%
  dplyr::mutate(nombre_region_16 = base::rowSums(dplyr::select(., starts_with("reg_16_"))))%>%
  dplyr::mutate(nombre_region_06 = base::rowSums(dplyr::select(., starts_with("reg_06_"))))%>%
  dplyr::mutate(nombre_region_01 = base::rowSums(dplyr::select(., starts_with("reg_01_"))))%>%
  dplyr::mutate(nombre_region_05 = base::rowSums(dplyr::select(., starts_with("reg_05_"))))%>%
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_02>0~1,TRUE~0)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_09>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_15>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_03>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_11>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_08>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_04>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_10>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_14>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_12>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_07>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_13>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_16>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_06>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_01>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_05>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  janitor::clean_names()
  
nombre_region_imputed_cat_reg<-  
    nombre_region_imputed %>%
        tidyr::pivot_longer(c(nombre_region_01, nombre_region_02, nombre_region_03, nombre_region_04, nombre_region_05, nombre_region_06, nombre_region_07, nombre_region_08, nombre_region_09, nombre_region_10, nombre_region_11, nombre_region_12, nombre_region_13, nombre_region_14, nombre_region_15), names_to = "cat_nombre_region", values_to = "count") %>%
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(nombre_region_imputed_max=max(count,na.rm=T)) %>% 
        dplyr::ungroup() %>% 
        dplyr::filter(nombre_region_imputed_max==count) %>% 
        dplyr::select(amelia_fit_imputations_imp1_row,cat_nombre_region,count) %>% 
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(n_row=n()) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate(cat_nombre_region=dplyr::case_when(n_row>1~NA_character_,
                                                        TRUE~cat_nombre_region)) %>% 
        dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
  
nombre_region_imputed<-
  nombre_region_imputed %>% 
    dplyr::left_join(nombre_region_imputed_cat_reg, by="amelia_fit_imputations_imp1_row") %>%
    dplyr::mutate(cat_nombre_region=dplyr::case_when(cat_nombre_region=="nombre_region_01"~"Tarapacá (01)",cat_nombre_region=="nombre_region_02"~"Antofagasta (02)",cat_nombre_region=="nombre_region_03"~"Atacama (03)",cat_nombre_region=="nombre_region_04"~"Coquimbo (04)",cat_nombre_region=="nombre_region_05"~"Valparaíso (05)",cat_nombre_region=="nombre_region_06"~"O'Higgins (06)",cat_nombre_region=="nombre_region_07"~"Maule (07)",cat_nombre_region=="nombre_region_08"~"Biobío (08)",cat_nombre_region=="nombre_region_09"~"Araucanía (09)",cat_nombre_region=="nombre_region_10"~"Los Lagos (10)",cat_nombre_region=="nombre_region_11"~"Aysén (11)",cat_nombre_region=="nombre_region_12"~"Magallanes (12)",cat_nombre_region=="nombre_region_13"~"Metropolitana (13)",
                                                 cat_nombre_region=="nombre_region_14"~"Los Ríos (14)",cat_nombre_region=="nombre_region_15"~"Arica (15)",cat_nombre_region=="nombre_region_16"~"Ñuble (16)",
    ))%>% 
  janitor::clean_names()

#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_
tipo_centro_pub_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$tipo_centro_pub,
       amelia_fit$imputations$imp2$tipo_centro_pub,
       amelia_fit$imputations$imp3$tipo_centro_pub,
       amelia_fit$imputations$imp4$tipo_centro_pub,
       amelia_fit$imputations$imp5$tipo_centro_pub,
       amelia_fit$imputations$imp6$tipo_centro_pub,
       amelia_fit$imputations$imp7$tipo_centro_pub,
       amelia_fit$imputations$imp8$tipo_centro_pub,
       amelia_fit$imputations$imp9$tipo_centro_pub,
       amelia_fit$imputations$imp10$tipo_centro_pub,
       amelia_fit$imputations$imp11$tipo_centro_pub,
       amelia_fit$imputations$imp12$tipo_centro_pub,
       amelia_fit$imputations$imp13$tipo_centro_pub,
       amelia_fit$imputations$imp14$tipo_centro_pub,
       amelia_fit$imputations$imp15$tipo_centro_pub,
       amelia_fit$imputations$imp16$tipo_centro_pub,
       amelia_fit$imputations$imp17$tipo_centro_pub,
       amelia_fit$imputations$imp18$tipo_centro_pub,
       amelia_fit$imputations$imp19$tipo_centro_pub,
       amelia_fit$imputations$imp20$tipo_centro_pub,
       amelia_fit$imputations$imp21$tipo_centro_pub,
       amelia_fit$imputations$imp22$tipo_centro_pub,
       amelia_fit$imputations$imp23$tipo_centro_pub,
       amelia_fit$imputations$imp24$tipo_centro_pub,
       amelia_fit$imputations$imp25$tipo_centro_pub,
       amelia_fit$imputations$imp26$tipo_centro_pub,
       amelia_fit$imputations$imp27$tipo_centro_pub,
       amelia_fit$imputations$imp28$tipo_centro_pub,
       amelia_fit$imputations$imp29$tipo_centro_pub,
       amelia_fit$imputations$imp30$tipo_centro_pub
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::filter(value==TRUE) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(tipo_centro_pub_to_imputation=ifelse(n()>15,1,0))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss6<-
CONS_C1_df_dup_SEP_2020_match_miss5 %>% 
   dplyr::left_join(dplyr::select(nombre_region_imputed,amelia_fit_imputations_imp1_row,cat_nombre_region), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(nombre_region=factor(dplyr::case_when(is.na(nombre_region)~as.character(cat_nombre_region),TRUE~as.character(nombre_region)))) %>% 
  dplyr::left_join(dplyr::select(tipo_centro_pub_imputed,amelia_fit_imputations_imp1_row,tipo_centro_pub_to_imputation), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(tipo_centro_pub=factor(dplyr::case_when(is.na(tipo_centro_pub)~as.logical(tipo_centro_pub_to_imputation),TRUE~as.logical(tipo_centro_pub)))) %>%
  dplyr::select(-c(cat_est_conyugal,cat_nombre_region,tipo_centro_pub_to_imputation,tipo_centro_pub_to_imputation)) %>% 
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))


There were impossible to impute region of the center in 6 cases due to ties in the different imputed values. In case of public or private center, there were no missing values once imputed.


Diagnose of Drug Consumption

We looked over possible imputations to the diagnosis of drug consumption (n=1).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

dg_trs_cons_sus_or_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp2$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp3$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp4$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp5$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp6$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp7$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp8$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp9$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp10$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp11$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp12$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp13$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp14$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp15$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp16$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp17$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp18$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp19$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp20$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp21$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp22$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp23$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp24$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp25$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp26$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp27$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp28$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp29$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp30$dg_trs_cons_sus_or
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::filter(value==TRUE) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(dg_trs_cons_imputation=ifelse(n()>15,1,0))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss7<-
CONS_C1_df_dup_SEP_2020_match_miss6 %>% 
    dplyr::left_join(dplyr::select(dg_trs_cons_sus_or_imputed,amelia_fit_imputations_imp1_row,dg_trs_cons_imputation), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(dg_trs_cons_sus_or=factor(dplyr::case_when(is.na(dg_trs_cons_sus_or)~as.logical(dg_trs_cons_imputation),TRUE~as.logical(dg_trs_cons_sus_or)))) %>%
  dplyr::select(-dg_trs_cons_imputation) %>% 
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))


Cause of Discharge

We looked over possible imputations to the truly missing values, discarding missing values due to censorship (n=20).

motivo_de_egreso_a_imputar<-
CONS_C1_df_dup_SEP_2020_match_miss %>% dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_imp)) %>% dplyr::filter(!is.na(fech_egres_imp))%>%dplyr::select(row)

motivodeegreso_mod_imp_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp2$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp3$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp4$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp5$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp6$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp7$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp8$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp9$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp10$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp11$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp12$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp13$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp14$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp15$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp16$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp17$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp18$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp19$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp20$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp21$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp22$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp23$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp24$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp25$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp26$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp27$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp28$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp29$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp30$motivodeegreso_mod_imp
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(amelia_fit_imputations_imp1_row) %>% 
  dplyr::ungroup() %>% 
  dplyr::filter(amelia_fit_imputations_imp1_row %in% unlist(motivo_de_egreso_a_imputar$row)) %>% 
  #FILTRAR CASOS QUE SON ILÓGICOS: MUERTES CON TRATAMIENTOS POSTERIORES (1)
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,dup, duplicates_filtered,evaluacindelprocesoteraputico,fech_ing_next_treat),by=c("amelia_fit_imputations_imp1_row"="row")) %>% 
  dplyr::mutate(value_death=dplyr::case_when(value=="Death"& !is.na(fech_ing_next_treat)~1,TRUE~0)) %>% 
  dplyr::filter(value_death!=1) %>%  
  #:#:#:#:#:
  dplyr::count(amelia_fit_imputations_imp1_row,value) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::slice_min(n, n = 1) %>% 
  dplyr::summarise(adm_dis=sum(value == "Administrative discharge",na.rm=T),
                    death=sum(value == "Death",na.rm=T),
                    referral=sum(value == "Referral to another treatment",na.rm=T),
                    ter_dis=sum(value == "Therapeutic discharge",na.rm=T),
                    dropout=sum(value =="Drop-out",na.rm=T)) %>% 
  rowwise() %>% 
  dplyr::mutate(ties=sum(c_across(adm_dis:dropout)),ties=ifelse(ties>1,1,0)) %>% 
  #dplyr::filter(ties==1) %>% 
  dplyr::ungroup() %>% 
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,fech_egres_num,dup, duplicates_filtered,evaluacindelprocesoteraputico,tipo_centro_derivacion),by=c("amelia_fit_imputations_imp1_row"="row")) %>% 
  dplyr::mutate(motivodeegreso_mod_imp_imputation= dplyr::case_when(
    ties==0 & adm_dis==1 & fech_egres_imp<"2019-11-13"~"Administrative discharge",
    #its an absorving state. should not have posterior treatments
    ties==0 & death==1 & fech_egres_imp<"2019-11-13" & dup==duplicates_filtered~"Death",
    ties==0 & referral==1 & fech_egres_imp<"2019-11-13"~"Referral to another treatment",
    ties==0 & ter_dis==1 & fech_egres_imp<"2019-11-13"~"Therapeutic discharge",
    ties==0 & dropout==1 & fech_egres_imp<"2019-11-13"~"Drop-out",
    #si no hay fecha de egreso, está en la fecha de censura, sólo puede ser tratamiento en curso
    fech_egres_imp>="2019-11-13"~NA_character_,
    TRUE~NA_character_)) %>% 
    #si tiene evaluacindelprocesoteraputico, es porque no es un tratamiento en curso
  dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp")

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:
CONS_C1_df_dup_SEP_2020_match_miss8<-
CONS_C1_df_dup_SEP_2020_match_miss7 %>% 
   dplyr::left_join(motivodeegreso_mod_imp_imputed[,c("amelia_fit_imputations_imp1_row","motivodeegreso_mod_imp_original","fech_egres_imp","fech_egres_num","motivodeegreso_mod_imp_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
  #dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::select(row,hash_key,motivodeegreso_mod_imp_original, motivodeegreso_mod_imp_imputation,motivodeegreso_mod_imp,fech_egres_num,fech_egres_imp)
      dplyr::mutate(motivodeegreso_mod_imp=factor(dplyr::case_when(is.na(motivodeegreso_mod_imp)~motivodeegreso_mod_imp_imputation,
                                                                   motivodeegreso_mod_imp_original=="Ongoing treatment"~NA_character_, TRUE~as.character(motivodeegreso_mod_imp)))) %>% 
  dplyr::select(-motivodeegreso_mod_imp_imputation,-fech_egres_imp,-fech_egres_num,-motivodeegreso_mod_imp_original) %>% 
  #dplyr::rename_all( list(~paste0(., ".left"))) %>% 
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp) %>% 
                     dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp"),by="row") %>%
  data.table()

# CONS_C1_df_dup_SEP_2020_match_miss8 %>% janitor::tabyl(motivodeegreso_mod_imp,motivodeegreso_mod_imp_original)
#CONS_C1_df_dup_SEP_2020_match_miss8 %>% janitor::tabyl(motivodeegreso_mod_imp_original)

#
if(
CONS_C1_df_dup_SEP_2020_match_miss8 %>% dplyr::filter(motivodeegreso_mod_imp_original!="Ongoing treatment",is.na(motivodeegreso_mod_imp)) %>% nrow()>0){"There are missing values on the cause of discharge"}


A total of 3 cases were not imputed due to ties in the imputed values.


Evaluation of the Therapeutic Process

Another variable that is worth imputing is the Evaluation of the Therapeutic Process at Discharge (n= 7,378). In case of ties, we selected the imputed values with the value with the minimum evaluation. Must consider that most of the null values could be explained by censoring or not completion of the treatment at the period of the study (n= 7,361).


# Ver distintos valores propuestos para sustancia de inciio
evaluacindelprocesoteraputico_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp2$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp3$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp4$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp5$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp6$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp7$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp8$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp9$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp10$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp11$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp12$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp13$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp14$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp15$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp16$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp17$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp18$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp19$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp20$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp21$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp22$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp23$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp24$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp25$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp26$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp27$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp28$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp29$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp30$evaluacindelprocesoteraputico
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(amelia_fit_imputations_imp1_row) %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
  dplyr::summarise(high_ach_1=sum(value == "1-High Achievement",na.rm=T),
                   med_ach_2=sum(value == "2-Medium Achievement",na.rm=T),
                  min_ach_3=sum(value =="3-Minimum Achievement",na.rm=T)) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(evaluacindelprocesoteraputico_imputation= dplyr::case_when(
      (high_ach_1 >med_ach_2) & (med_ach_2 >min_ach_3)~"1-High Achievement",
      (med_ach_2>high_ach_1) & (med_ach_2 >min_ach_3)~"2-Medium Achievement",
      (min_ach_3>med_ach_2) & (min_ach_3 >high_ach_1)~"3-Minimum Achievement"))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
##
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)

CONS_C1_df_dup_SEP_2020_match_miss9<-
CONS_C1_df_dup_SEP_2020_match_miss8 %>% 
   dplyr::left_join(evaluacindelprocesoteraputico_imputed[,c("amelia_fit_imputations_imp1_row","evaluacindelprocesoteraputico_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
    dplyr::mutate(evaluacindelprocesoteraputico=factor(dplyr::case_when(is.na(evaluacindelprocesoteraputico) & motivodeegreso_mod_imp %in% c("Drop-out","Administrative discharge","Therapeutic discharge","Referral to another treatment")~evaluacindelprocesoteraputico_imputation,
                                                                        is.na(motivodeegreso_mod_imp)~NA_character_,
                                                                        TRUE~as.character(evaluacindelprocesoteraputico)))) %>% 
     dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
  dplyr::select(-evaluacindelprocesoteraputico_imputation) %>% 
  data.table()

CONS_C1_df_dup_SEP_2020_match_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico) %>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 2. Cause of Discharge vs. Evaluation of the Therapeutic Procress"),
               col.names = c("Cause of Discharge","1-High Achievement", "2- Medium Achievement","3- Minimum Achievement","Null Values"),
               align =rep('c', 101)) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
  kableExtra::scroll_box(width = "100%", height = "375px") 
Table 2. Cause of Discharge vs. Evaluation of the Therapeutic Procress
Cause of Discharge 1-High Achievement 2- Medium Achievement 3- Minimum Achievement Null Values
Administrative discharge 867 4,427 4,488 0
Death 0 0 1 0
Drop-out 1,767 16,839 37,301 0
Referral to another treatment 1,298 5,835 4,705 0
Therapeutic discharge 17,120 6,135 1,118 1
NA 0 0 0 7,854


As seen in the table above, ongoing treatments did not have an evaluation process, which is logically valid, since their treatment competition was not captured.


Treatment Setting (Residential)

We looked over possible imputations to the treatment setting (n=97).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

tipo_de_plan_res_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$tipo_de_plan_res,
       amelia_fit$imputations$imp2$tipo_de_plan_res,
       amelia_fit$imputations$imp3$tipo_de_plan_res,
       amelia_fit$imputations$imp4$tipo_de_plan_res,
       amelia_fit$imputations$imp5$tipo_de_plan_res,
       amelia_fit$imputations$imp6$tipo_de_plan_res,
       amelia_fit$imputations$imp7$tipo_de_plan_res,
       amelia_fit$imputations$imp8$tipo_de_plan_res,
       amelia_fit$imputations$imp9$tipo_de_plan_res,
       amelia_fit$imputations$imp10$tipo_de_plan_res,
       amelia_fit$imputations$imp11$tipo_de_plan_res,
       amelia_fit$imputations$imp12$tipo_de_plan_res,
       amelia_fit$imputations$imp13$tipo_de_plan_res,
       amelia_fit$imputations$imp14$tipo_de_plan_res,
       amelia_fit$imputations$imp15$tipo_de_plan_res,
       amelia_fit$imputations$imp16$tipo_de_plan_res,
       amelia_fit$imputations$imp17$tipo_de_plan_res,
       amelia_fit$imputations$imp18$tipo_de_plan_res,
       amelia_fit$imputations$imp19$tipo_de_plan_res,
       amelia_fit$imputations$imp20$tipo_de_plan_res,
       amelia_fit$imputations$imp21$tipo_de_plan_res,
       amelia_fit$imputations$imp22$tipo_de_plan_res,
       amelia_fit$imputations$imp23$tipo_de_plan_res,
       amelia_fit$imputations$imp24$tipo_de_plan_res,
       amelia_fit$imputations$imp25$tipo_de_plan_res,
       amelia_fit$imputations$imp26$tipo_de_plan_res,
       amelia_fit$imputations$imp27$tipo_de_plan_res,
       amelia_fit$imputations$imp28$tipo_de_plan_res,
       amelia_fit$imputations$imp29$tipo_de_plan_res,
       amelia_fit$imputations$imp30$tipo_de_plan_res
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(n_res=sum(value=="1",na.rm=T),n_amb=sum(value=="0",na.rm=T))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss10<-
CONS_C1_df_dup_SEP_2020_match_miss9 %>% 
    dplyr::left_join(dplyr::select(tipo_de_plan_res_imputed,amelia_fit_imputations_imp1_row,n_res,n_amb), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(tipo_de_plan_res=factor(dplyr::case_when(is.na(tipo_de_plan_res)& (n_res>n_amb)~"1",is.na(tipo_de_plan_res)& (n_res<n_amb)~"0",TRUE~as.character(tipo_de_plan_res)))) %>%
  dplyr::select(-n_res,-n_amb) %>% 
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))

As a result of the process of imputation, some values were not possible to impute (n=97).


Sample Characteristics

We checked the characteristics of the sample depending on type of treatment (Residential or Outpatients).


#prop.table(table(CONS_C1_df_dup_SEP_2020_match$abandono_temprano_rec,CONS_C1_df_dup_SEP_2020_match$tipo_de_plan_res),2)
match.on_tot <- c("row", "hash_key","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_centro_pub","abandono_temprano_rec","evaluacindelprocesoteraputico","motivodeegreso_mod_imp","dg_trs_cons_sus_or","tipo_de_plan_res","sexo_2","edad_al_ing","fech_ing_num")
#$109,756
#añado los imputados
CONS_C1_df_dup_SEP_2020_match_miss_after_imp<-
CONS_C1_df_dup_SEP_2020_match_miss %>% 
  dplyr::select(-sus_ini_mod_mvv,-estado_conyugal_2,-escolaridad_rec,-freq_cons_sus_prin,-nombre_region,-tipo_centro_pub,-evaluacindelprocesoteraputico,-motivodeegreso_mod_imp,-dg_trs_cons_sus_or,-tipo_de_plan_res,-edad_ini_cons,-via_adm_sus_prin_act) %>% #
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020_match_miss10,
                                 row,
                                 sus_ini_mod_mvv,
                                 estado_conyugal_2,
                                 escolaridad_rec,
                                 freq_cons_sus_prin,
                                 nombre_region,
                                 tipo_centro_pub,
                                 evaluacindelprocesoteraputico,
                                 motivodeegreso_mod_imp,
                                 dg_trs_cons_sus_or,
                                 tipo_de_plan_res,
                                 edad_ini_cons,rn),by="row") %>% 
  dplyr::arrange(tipo_de_plan_res,hash_key,rn) %>% 
  #elimino esta variable porque es accesoria
  dplyr::select(-edad_ini_sus_prin) %>% 
  #para transformar el motivo de egreso
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_num,dias_treat_imp_sin_na),by="row") %>%
  #dplyr::filter(fech_egres_num==18213,!is.na(motivodeegreso_mod_imp)) %>% 
  dplyr::mutate(motivodeegreso_mod_imp=dplyr::case_when(dias_treat_imp_sin_na>=90 & motivodeegreso_mod_imp=="Drop-out"~ "Late Drop-out",
                                                        dias_treat_imp_sin_na<90 & motivodeegreso_mod_imp=="Drop-out"~ "Early Drop-out",
                                                        fech_egres_num==18213 & is.na(motivodeegreso_mod_imp)~"Ongoing treatment",
                                                        TRUE~as.character(motivodeegreso_mod_imp)
                                                        )) %>% #janitor::tabyl(motivodeegreso_mod_imp)
  dplyr::mutate(evaluacindelprocesoteraputico2=dplyr::case_when(fech_egres_num==18213 & is.na(evaluacindelprocesoteraputico)~"Ongoing treatment",
                                                        TRUE~as.character(evaluacindelprocesoteraputico)
  )) %>% 
  dplyr::mutate(sum_miss = base::rowSums(is.na(dplyr::select(.,c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","freq_cons_sus_prin","nombre_region","tipo_centro_pub","evaluacindelprocesoteraputico2","motivodeegreso_mod_imp","dg_trs_cons_sus_or","tipo_de_plan_res","edad_ini_cons","sexo_2","edad_al_ing","fech_ing_num"))))) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(sum_miss=sum(sum_miss)) %>% 
  dplyr::ungroup() 

CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp %>% 
  dplyr::filter(sum_miss>0)

CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp %>% 
  dplyr::filter(sum_miss==0) %>% 
  dplyr::select(-sum_miss) %>% 
  dplyr::left_join(CONS_C1_df_dup_SEP_2020[c("row","condicion_ocupacional_corr")], by="row") %>% 
  dplyr::select(-evaluacindelprocesoteraputico2)

#  CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados[complete.cases(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados[,..match.on_tot]),..match.on_tot] 


Considering that some missing values were not able to imputation (due to ties in the candidate values for imputation or inconsistent values for imputations) (337, users=272), we ended the process having 109,419 complete cases (users=84,776).


kableone <- function(x, ...) {
  capture.output(x <- print(x,...))
  knitr::kable(x,format= "html", format.args= list(decimal.mark= ".", big.mark= ","))
}
match.on.sel<-c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","dg_trs_cons_sus_or", "tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","condicion_ocupacional_corr")
catVars<-
c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","tipo_centro_pub","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","dg_trs_cons_sus_or","nombre_region","tipo_de_plan_res","sexo_2","condicion_ocupacional_corr")
#length(unique(CONS_C1_df_dup_SEP_2020_match$fech_ing_num))
#:#:#:#:#: DISMINUIR LA HETEROGENEIDAD DE LA FECHA DE INGRESO
# FORMAS DE CONSTREÑIR LA VARIABLE:
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-cut(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,100)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-CONS_C1_df_dup_SEP_2020_match_fech_ing_num
#CONS_C1_df_dup_SEP_2020_match_fech_ing_num<-CONS_C1_df_dup_SEP_2020_match$fech_ing_num
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,0)))
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)))

#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#:#:#:#:#: 

paste0("Inconsistencies in dup vs. rn: ",CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados%>% 
         dplyr::filter(dup!=rn) %>% nrow())
## [1] "Inconsistencies in dup vs. rn: 0"
CONS_C1_df_dup_SEP_2020_match_not_miss2 <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(dup==1) %>% 
  dplyr::select(-rn,-dias_treat_imp_sin_na,-fech_egres_num)

attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$sus_ini_mod_mvv,"label")<-"Starting Substance"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$edad_ini_cons,"label")<-"Age of Onset of Drug Use"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$freq_cons_sus_prin,"label")<-"Frequency of use of primary drug"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$nombre_region,"label")<-"Region of the Center"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$dg_cie_10_rec,"label")<-"Psychiatric Comorbidity"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$dg_trs_cons_sus_or,"label")<-"Drug Dependence"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$evaluacindelprocesoteraputico,"label")<-"Evaluation of the Therapeutic Process"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$abandono_temprano_rec,"label")<-"Early Discharge"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res,"label")<-"Residential"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_centro_pub,"label")<-"Public Center"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$condicion_ocupacional_corr,"label")<-"Occupational Status"

pre_tab1<-Sys.time()
tab1<-
CreateTableOne(vars = match.on.sel, strata = "tipo_de_plan_res", 
                       data = CONS_C1_df_dup_SEP_2020_match_not_miss2, factorVars = catVars, smd=T)
post_tab1<-Sys.time()
diff_time_tab1=post_tab1-pre_tab1

kableone(tab1, 
         caption = paste0("Table 5. Covariate Balance in the Variables of Interest"),
         col.names= c("Variables","Ambulatory","Residential", "p-values","test","SMD"),
         nonnormal= c("edad_ini_cons","edad_al_ing","fech_ing_num"),#"\\hline",
                       smd=T, test=T, varLabels=T,noSpaces=T, printToggle=T, dropEqual=F) %>% 
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 10) %>%
  #()
  row_spec(1, bold = T, italic =T,color ="black",hline_after=T,extra_latex_after="\\arrayrulecolor{white}",font_size= 10) %>%
  #footnote(general = "Here is a general comments of the table. ",
  #        number = c("Footnote 1; ", "Footnote 2; "),
  #         alphabet = c("Footnote A; ", "Footnote B; "),
  #         symbol = c("Footnote Symbol 1; ", "Footnote Symbol 2")
  #         )%>%
  scroll_box(width = "100%", height = "400px") 
0 1 p test SMD
n 72083 12693
Starting Substance (%) <0.001 0.369
Alcohol 41410 (57.4) 5074 (40.0)
Cocaine hydrochloride 2925 (4.1) 513 (4.0)
Cocaine paste 7671 (10.6) 2235 (17.6)
Marijuana 18417 (25.5) 4554 (35.9)
Other 1660 (2.3) 317 (2.5)
Marital Status (%) <0.001 0.309
Married/Shared living arrangements 26166 (36.3) 2911 (22.9)
Separated/Divorced 7713 (10.7) 1318 (10.4)
Single 37340 (51.8) 8332 (65.6)
Widower 864 (1.2) 132 (1.0)
Educational Attainment (%) <0.001 0.124
3-Completed primary school or less 21857 (30.3) 4572 (36.0)
2-Completed high school or less 37209 (51.6) 6136 (48.3)
1-More than high school 13017 (18.1) 1985 (15.6)
Age of Onset of Drug Use (median [IQR]) 15.00 [14.00, 18.00] 15.00 [13.00, 17.00] <0.001 nonnorm 0.090
Frequency of use of primary drug (%) <0.001 0.767
1 day a week or more 5323 (7.4) 273 (2.2)
2 to 3 days a week 22322 (31.0) 1323 (10.4)
4 to 6 days a week 12223 (17.0) 1649 (13.0)
Daily 28265 (39.2) 9231 (72.7)
Did not use 1094 (1.5) 84 (0.7)
Less than 1 day a week 2856 (4.0) 133 (1.0)
Origen de Ingreso (Primera Entrada)/Motive of Admission to Treatment (First Entry) (%) <0.001 0.509
Spontaneous 33648 (46.7) 4270 (33.6)
Assisted Referral 4933 (6.8) 3004 (23.7)
Other 3753 (5.2) 738 (5.8)
Justice Sector 7134 (9.9) 813 (6.4)
Health Sector 22615 (31.4) 3868 (30.5)
Psychiatric Comorbidity (%) <0.001 0.317
Without psychiatric comorbidity 29015 (40.3) 3247 (25.6)
Diagnosis unknown (under study) 13270 (18.4) 2763 (21.8)
With psychiatric comorbidity 29798 (41.3) 6683 (52.7)
Region of the Center (%) <0.001 0.388
Antofagasta (02) 2291 (3.2) 697 (5.5)
Araucanía (09) 2221 (3.1) 162 (1.3)
Arica (15) 1315 (1.8) 728 (5.7)
Atacama (03) 1831 (2.5) 258 (2.0)
Aysén (11) 797 (1.1) 42 (0.3)
Biobío (08) 5091 (7.1) 703 (5.5)
Coquimbo (04) 2798 (3.9) 268 (2.1)
Los Lagos (10) 2646 (3.7) 375 (3.0)
Los Ríos (14) 1113 (1.5) 185 (1.5)
Magallanes (12) 929 (1.3) 31 (0.2)
Maule (07) 4208 (5.8) 638 (5.0)
Metropolitana (13) 35961 (49.9) 6256 (49.3)
Ñuble (16) 540 (0.7) 20 (0.2)
O’Higgins (06) 3638 (5.0) 567 (4.5)
Tarapacá (01) 1350 (1.9) 598 (4.7)
Valparaíso (05) 5354 (7.4) 1165 (9.2)
Drug Dependence = TRUE (%) 50002 (69.4) 11645 (91.7) <0.001 0.589
Public Center = TRUE (%) 57121 (79.2) 3614 (28.5) <0.001 1.183
Sexo Usuario/Sex of User = Women (%) 17394 (24.1) 3937 (31.0) <0.001 0.155
Edad a la Fecha de Ingreso a Tratamiento (numérico continuo) (Primera Entrada)/Age at Admission to Treatment (First Entry) (median [IQR]) 34.43 [27.55, 43.46] 32.63 [26.34, 40.85] <0.001 nonnorm 0.185
Fecha de Ingreso a Tratamiento (Numérico)(c)/Date of Admission to Treatment (Numeric)(c) (median [IQR]) 16580.00 [15730.00, 17359.00] 16153.00 [15342.00, 17023.00] <0.001 nonnorm 0.293
Occupational Status (%) <0.001 1.025
Employed 39517 (54.8) 1771 (14.0)
Inactive 7674 (10.6) 1195 (9.4)
Looking for a job for the first time 172 (0.2) 20 (0.2)
No activity 2664 (3.7) 1820 (14.3)
Not seeking for work 492 (0.7) 335 (2.6)
Unemployed 21564 (29.9) 7552 (59.5)
#"tipo_de_plan_ambulatorio",
#https://cran.r-project.org/web/packages/tableone/vignettes/smd.html
#http://rstudio-pubs-static.s3.amazonaws.com/405765_2ce448f9bde24148a5f94c535a34b70e.html
#https://cran.r-project.org/web/packages/tableone/vignettes/introduction.html
#https://cran.r-project.org/web/packages/tableone/tableone.pdf
#https://www.rdocumentation.org/packages/tableone/versions/0.12.0/topics/CreateTableOne

## Construct a table 
#standardized mean differences of greater than 0.1


We checked the similarity in the samples using other measures, such as the variance ratio of the samples and Kolmogorov-Smirnov(KS) statistics.


library(cobalt)

bal2<-bal.tab(CONS_C1_df_dup_SEP_2020_match_not_miss2[,match.on.sel], treat = CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))
#"mean.diffs", "variance.ratios","ks.statistics","ovl.coefficient"

options(knitr.kable.NA = '')

bal2$Balance[,2]<-round(bal2$Balance[,2],2)
bal2$Balance[,4]<-round(bal2$Balance[,4],2)
bal2$Balance[,6]<-round(bal2$Balance[,6],2)

var_names<- 
    list("origen_ingreso_mod_Spontaneous"="Motive Admission-Spontaneous",
         "origen_ingreso_mod_Assisted Referral"= "Motive Admission-Assisted Referral",
         "origen_ingreso_mod_Other"="Motive Admission-Other",
         "origen_ingreso_mod_Justice Sector"= "Motive Admission-Justice Sector",
         "origen_ingreso_mod_Health Sector"="Motive Admission-Health Sector",
         "dg_cie_10_rec_Without psychiatric comorbidity"="ICD-10-Wo/Psych Comorbidity",
         "dg_cie_10_rec_Diagnosis unknown (under study)"="ICD-10-Dg. Unknown/under study",
         "dg_cie_10_rec_With psychiatric comorbidity"="ICD-10-W/Psych Comorbidity",
         "sexo_2_Women"="Sex-Women",
         "edad_al_ing"="Age at Admission",
         "fech_ing_num"="Date of Admission",
         "duplicates_filtered"="Treatments (#)",
         "more_one_treat"=">1 treatment",
         "sus_ini_mod_mvv_Alcohol"= "Starting Substance-Alcohol",
         "sus_ini_mod_mvv_Cocaine hydrochloride"= "Starting Substance-Cocaine hydrochloride",
         "sus_ini_mod_mvv_Cocaine paste"="Starting Substance-Cocaine paste",
         "sus_ini_mod_mvv_Marijuana"="Starting Substance-Marijuana",
         "sus_ini_mod_mvv_Other"="Starting Substance-Other",
         "estado_conyugal_2_Married/Shared living arrangements"="Marital Status-Married/Shared liv. arr.",
         "condicion_ocupacional_corr_Employed"="Occ.Status-Employed",
         "condicion_ocupacional_corr_Inactive"="Occ.Status-Inactive",
         "condicion_ocupacional_corr_Looking for a job for the first time"="Occ.Status-Looking 1st job",
         "condicion_ocupacional_corr_No activity"="Occ.Status- No activity",
         "condicion_ocupacional_corr_Not seeking for work"="Occ.Status- Not seeking work",
         "condicion_ocupacional_corr_Unemployed"="Occ.Status- Unemployed",
         "estado_conyugal_2_Separated/Divorced"="Marital Status-Separated/Divorced",
         "estado_conyugal_2_Single"= "Marital Status-Single",
         "estado_conyugal_2_Widower"="Marital Status-Widower",
         "escolaridad_rec_3-Completed primary school or less"="Educational Attainment-PS or less",
         "escolaridad_rec_2-Completed high school or less"="Educational Attainment-HS or less",
         "escolaridad_rec_1-More than high school"="Educational Attainment-More than HS",
         "freq_cons_sus_prin_1 day a week or more"="Freq Drug Cons-1d/wk or more",
         "freq_cons_sus_prin_2 to 3 days a week"="Freq Drug Cons-2-3d/wk",
         "freq_cons_sus_prin_4 to 6 days a week"="Freq Drug Cons-4-6d/wk",
         "freq_cons_sus_prin_Daily"="Freq Drug Cons-Daily",
         "freq_cons_sus_prin_Did not use"="Freq Drug Cons-Did not use",
         "freq_cons_sus_prin_Less than 1 day a week"="Freq Drug Cons-Less 1d/wk",
         "nombre_region_Antofagasta (02)"="Region-Antofagasta(02)",
         "nombre_region_Araucanía (09)"="Region-Araucanía(09)",
         "nombre_region_Arica (15)"="Region-Arica(15)",
         "nombre_region_Atacama (03)"="Region-Atacama(03)",
         "nombre_region_Aysén (11)"="Region-Aysén(11)",
         "nombre_region_Biobío (08)"="Region- Biobío(08)",
         "nombre_region_Coquimbo (04)"="Region-Coquimbo(04)",
         "nombre_region_Los Lagos (10)"="Region-Los Lagos(10)",
         "nombre_region_Los Ríos (14)"="Region-Los Ríos(14)",
         "nombre_region_Magallanes (12)"="Region-Magallanes(12)",
         "nombre_region_Maule (07)"="Region-Maule(07)",
         "nombre_region_Metropolitana (13)"="Region-Metropolitana(13)",
         "nombre_region_Ñuble (16)"="Region-Ñuble(16)",
         "nombre_region_O'Higgins (06)"="Region-O'Higgins(06)",
         "nombre_region_Tarapacá (01)"="Region-Tarapacá(01)",
         "nombre_region_Valparaíso (05)"="Region-Valparaíso(05)",
         "tipo_centro_pub"="Public Center",
         "dg_trs_cons_sus_or"= "Drug Dependence",
         "edad_ini_cons"="Age of Onset of Drug Use",
         "rn"="Treatment")

var.names<-data.table(data.frame(unlist(var_names)),keep.rownames = T) %>% janitor::clean_names()

balance_prev<-
data.table::data.table(bal2$Balance[,1:6],keep.rownames = T) %>%
  dplyr::arrange(-abs(Diff.Un)) %>% 
  dplyr::left_join(var.names,by="rn") %>% 
  dplyr::select(unlist_var_names,everything()) %>% 
  dplyr::select(-rn) 

balance_prev %>% #data.table::data.table(keep.rownames = F)
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 4. Covariate Balance in the Variables of Interest"),
               col.names = c("Variables","Nature of Variables", "Unadjusted SMDs","Threshold","Unadjusted Variance Ratios","Threshold","Unadjusted KS"),
               align =rep('c', 101)) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10) %>%
  kableExtra::add_footnote( c(paste("Note. ")), 
                            notation = "none") %>%
  kableExtra::scroll_box(width = "100%", height = "375px")
Table 4. Covariate Balance in the Variables of Interest
Variables Nature of Variables Unadjusted SMDs Threshold Unadjusted Variance Ratios Threshold Unadjusted KS
Public Center Binary -1.18 Not Balanced, >0.1 0.51
Occ.Status-Employed Binary -0.95 Not Balanced, >0.1 0.41
Freq Drug Cons-Daily Binary 0.72 Not Balanced, >0.1 0.34
Occ.Status- Unemployed Binary 0.62 Not Balanced, >0.1 0.30
Drug Dependence Binary 0.59 Not Balanced, >0.1 0.22
Freq Drug Cons-2-3d/wk Binary -0.52 Not Balanced, >0.1 0.21
Motive Admission-Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17
Occ.Status- No activity Binary 0.38 Not Balanced, >0.1 0.11
Starting Substance-Alcohol Binary -0.36 Not Balanced, >0.1 0.17
ICD-10-Wo/Psych Comorbidity Binary -0.32 Not Balanced, >0.1 0.15
Marital Status-Married/Shared liv. arr. Binary -0.30 Not Balanced, >0.1 0.13
Date of Admission Contin. -0.29 Not Balanced, >0.1 1.00 Balanced, <2 0.14
Marital Status-Single Binary 0.28 Not Balanced, >0.1 0.14
Motive Admission-Spontaneous Binary -0.27 Not Balanced, >0.1 0.13
Freq Drug Cons-1d/wk or more Binary -0.25 Not Balanced, >0.1 0.05
Starting Substance-Marijuana Binary 0.23 Not Balanced, >0.1 0.10
ICD-10-W/Psych Comorbidity Binary 0.23 Not Balanced, >0.1 0.11
Region-Arica(15) Binary 0.21 Not Balanced, >0.1 0.04
Starting Substance-Cocaine paste Binary 0.20 Not Balanced, >0.1 0.07
Freq Drug Cons-Less 1d/wk Binary -0.19 Not Balanced, >0.1 0.03
Age at Admission Contin. -0.19 Not Balanced, >0.1 0.84 Balanced, <2 0.07
Region-Tarapacá(01) Binary 0.16 Not Balanced, >0.1 0.03
Sex-Women Binary 0.15 Not Balanced, >0.1 0.07
Occ.Status- Not seeking work Binary 0.15 Not Balanced, >0.1 0.02
Motive Admission-Justice Sector Binary -0.13 Not Balanced, >0.1 0.03
Educational Attainment-PS or less Binary 0.12 Not Balanced, >0.1 0.06
Region-Araucanía(09) Binary -0.12 Not Balanced, >0.1 0.02
Region-Magallanes(12) Binary -0.12 Not Balanced, >0.1 0.01
Freq Drug Cons-4-6d/wk Binary -0.11 Not Balanced, >0.1 0.04
Region-Antofagasta(02) Binary 0.11 Not Balanced, >0.1 0.02
Region-Coquimbo(04) Binary -0.10 Not Balanced, >0.1 0.02
Age of Onset of Drug Use Contin. -0.09 Balanced, <0.1 0.91 Balanced, <2 0.07
Region-Aysén(11) Binary -0.09 Balanced, <0.1 0.01
Region-Ñuble(16) Binary -0.09 Balanced, <0.1 0.01
Freq Drug Cons-Did not use Binary -0.08 Balanced, <0.1 0.01
ICD-10-Dg. Unknown/under study Binary 0.08 Balanced, <0.1 0.03
Educational Attainment-HS or less Binary -0.07 Balanced, <0.1 0.03
Educational Attainment-More than HS Binary -0.06 Balanced, <0.1 0.02
Region- Biobío(08) Binary -0.06 Balanced, <0.1 0.02
Region-Valparaíso(05) Binary 0.06 Balanced, <0.1 0.02
Region-Los Lagos(10) Binary -0.04 Balanced, <0.1 0.01
Region-Maule(07) Binary -0.04 Balanced, <0.1 0.01
Occ.Status-Inactive Binary -0.04 Balanced, <0.1 0.01
Motive Admission-Other Binary 0.03 Balanced, <0.1 0.01
Region-Atacama(03) Binary -0.03 Balanced, <0.1 0.01
Region-O’Higgins(06) Binary -0.03 Balanced, <0.1 0.01
Marital Status-Widower Binary -0.02 Balanced, <0.1 0.00
Motive Admission-Health Sector Binary -0.02 Balanced, <0.1 0.01
Occ.Status-Looking 1st job Binary -0.02 Balanced, <0.1 0.00
Starting Substance-Other Binary 0.01 Balanced, <0.1 0.00
Marital Status-Separated/Divorced Binary -0.01 Balanced, <0.1 0.00
Region-Los Ríos(14) Binary -0.01 Balanced, <0.1 0.00
Region-Metropolitana(13) Binary -0.01 Balanced, <0.1 0.01
Starting Substance-Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00
Note.


We generated a plot to focus on unbalanced data.


Figure 8. Covariates Balance on Different Values

Figure 8. Covariates Balance on Different Values

Specification

First, we had to discretize categorical variables into logical parameters, and for continuous covariates, we divide them into 20 equal parts.


catVars<-
c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","tipo_centro_pub","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","dg_trs_cons_sus_or","nombre_region","tipo_de_plan_res","sexo_2","condicion_ocupacional_corr")
columna_dummy <- function(df, columna) {
  df %>% 
  mutate_at(columna, ~paste(columna, eval(as.symbol(columna)), sep = "_")) %>% 
    mutate(valor = 1) %>% 
    spread(key = columna, value = valor, fill = 0)
}

quantiles = function(covar, n_q) {
    p_q = seq(0, 1, 1/n_q)
    val_q = quantile(covar, probs = p_q, na.rm = TRUE)
    covar_out = rep(NA, length(covar))
    for (i in 1:n_q) {
        if (i==1) {covar_out[covar<val_q[i+1]] = i}
        if (i>1 & i<n_q) {covar_out[covar>=val_q[i] & covar<val_q[i+1]] = i}
        if (i==n_q) {covar_out[covar>=val_q[i] & covar<=val_q[i+1]] = i}}
    covar_out
}

CONS_C1_df_dup_SEP_2020_match_not_miss3<-CONS_C1_df_dup_SEP_2020_match_not_miss2
for (i in c(1:length(catVars))){#catVars[-10] excluding treatment indicator
  cat<-as.character(catVars[i])#catVars[-10] excluding treatment indicator
  CONS_C1_df_dup_SEP_2020_match_not_miss3<-columna_dummy(CONS_C1_df_dup_SEP_2020_match_not_miss3,cat)
}
CONS_C1_df_dup_SEP_2020_match_not_miss3$tipo_de_plan_res_FALSE<-NULL
CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_ini_cons<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_ini_cons,20)
CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_al_ing<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_al_ing,20)
CONS_C1_df_dup_SEP_2020_match_not_miss3$fech_ing_num<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$fech_ing_num,20)
match.on.sel2<-names(CONS_C1_df_dup_SEP_2020_match_not_miss3)[-c(1,2,5)]
#"edad_ini_cons","edad_al_ing","fech_ing_num")

CONS_SEP_match = data.table::data.table(CONS_C1_df_dup_SEP_2020_match_not_miss2[order(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res, decreasing = TRUE), ])

CONS_SEP_match_dum = data.table::data.table(CONS_C1_df_dup_SEP_2020_match_not_miss3 %>% dplyr::arrange(factor(row, levels = CONS_SEP_match$row)))


Match

The matched variables were defined for the treatments at baseline (n=84,776).


library(designmatch)

#fine = list(covs = fine_covs)
#solver = list(name = name, t_max = t_max, approximate = 1, round_cplex = 0, trace_cplex = 0).
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
# 1. Gurobi installation

#For an exact solution, we strongly recommend running designmatch either with CPLEX or Gurobi.  Between these two solvers, the R interface of Gurobi is considerably easier to install.  Here we provide general instructions for manually installing Gurobi and its R interface in Mac and Windows machines.

#1. Create a free academic license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/creating_a_new_academic_li.html

#2. Install the software
#   2.1. In http://www.gurobi.com/index, go to Downloads > Gurobi Software
#   2.2. Choose your operating system and press download
#
#3. Retrieve and set up your Gurobi license
#   2.1. Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_and_setting_up_.html
#   2.2. Then follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_a_free_academic.html
#
#4. Test your license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/testing_your_license.html
#
#5. Install the R interface of Gurobi   
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/r_installing_the_r_package.html
#   * In Windows, in R run the command install.packages("PATH\\gurobi_7.X-Y.zip", repos=NULL) where path leads to the file gurobi_7.X-Y.zip (for example PATH=C:\\gurobi702\\win64\\R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#   * In MAC, in R run the command install.packages('PATH/gurobi_7.X-Y.tgz', repos=NULL) where path leads to the file gurobi_7.X-Y.tgz (for example PATH=/Library/gurobi702/mac64/R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#       
#6. Test the installation 
#   Load the library and run the examples therein
#   * A possible error that you may get is the following: "Error: package ‘slam’ required by ‘gurobi’ could not be found". If that case, install.packages('slam') and try again.
#   You should be all set!
CONS_SEP_match$tipo_de_plan_res<-ifelse(CONS_SEP_match$tipo_de_plan_res=="1",1,0)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
require(slam)
# Solver options
#default solver is glpk with approximate = 1
#For an exact solution, we strongly recommend using cplex or gurobi as they are much faster than the other solvers, but they do require a license (free for academics, but not for people outside universities)
t_max = 60*60*6
solver = "gurobi" #cplex, glpk, gurobi and symphony
solver = list(name = solver, 
  t_max = t_max, #t_max is a scalar with the maximum time limit for finding the matches.within this time limit, a partial, suboptimal solution is given
  approximate = 0,#. If approximate = 1 (the default), an approximate solution is found via a relaxation of the original integer program. #FEB2021: I dont want to violate some balancing constraints to some extent. Change to 0.
  round_cplex = 0, 
  trace = 1#turns the optimizer output on
  )

#Indicador de tratamiento
t_ind= ifelse(CONS_SEP_match$tipo_de_plan_res=="1",1,0)

#table(is.na(CONS_SEP_match$tipo_de_plan_res))

# Moment balance: constrain differences in means to be at most 0.1 standard deviations apart
#:#:#:#:#:#:#:#:#:#:#:#:#:
#######mom_covs is a matrix where each column is a covariate whose mean is to be balanced
#######mom_tols is a vector of tolerances for the maximum difference in means for the covariates in mom_covs
#######mom_targets is a vector of target moments (e.g., means) of a distribution to be approximated by matched sampling. is optional, but if #######mom_covs is specified then mom_tols needs to be specified too
#######The lengths of mom_tols and mom_target have to be equal to the number of columns of mom_covs
mom_covs = cbind(CONS_SEP_match$edad_al_ing,
                 CONS_SEP_match$fech_ing_num,
                 CONS_SEP_match$edad_ini_cons)
mom_tols = absstddif(mom_covs, t_ind, .15)# original, 0.05, ahora probaré con 0.7
mom = list(covs = mom_covs, tols = mom_tols, targets = NULL)

# Mean balance
covs = cbind(CONS_SEP_match$edad_al_ing,
                 CONS_SEP_match$fech_ing_num,
                 CONS_SEP_match$edad_ini_cons)
meantab(covs, t_ind)
##      Mis      Min      Max   Mean T   Mean C Std Dif P-val
## [1,]   0    14.88    88.84    35.99    35.99       0     1
## [2,]   0 13621.00 18199.00 16445.49 16445.49       0     1
## [3,]   0     5.00    74.00    16.51    16.51       0     1
# Fine balance
#is a matrix where each column is a nominal covariate for fine balance
fine_covs = cbind(CONS_SEP_match$origen_ingreso_mod,
                  CONS_SEP_match$dg_cie_10_rec,
                  CONS_SEP_match$sexo_2,
                  CONS_SEP_match$sus_ini_mod_mvv,
                  CONS_SEP_match$tipo_centro_pub, #cuidado
                  CONS_SEP_match$estado_conyugal_2, 
                  CONS_SEP_match$escolaridad_rec,
                  CONS_SEP_match$freq_cons_sus_prin,
                  CONS_SEP_match$nombre_region,
                  CONS_SEP_match$condicion_ocupacional_corr,
                  #d_match_no_duplicates$evaluacindelprocesoteraputico,
                  CONS_SEP_match$dg_trs_cons_sus_or
)
fine = list(covs = fine_covs)

# 11,448; No. of controls: 11,448"
# 11,452; No. of controls: 11,452"
# 11,459; No. of controls: 11,459" #when I changed tolerance from .0999 to .1999
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#MATCH
start.time <- Sys.time()
set.seed(2125)
out = cardmatch(t_ind, #ES NECESARIO QUE LOS TRATAMIENTOS ESTEN ORDENADOS Y LOS OTROS VECTORES TAMBIËN 
                mom = mom,# ya los definí list(covs = mom_covs, tols = mom_tols, targets = mom_targets), 
          fine = fine, 
          solver = solver)
##   Building the matching problem... 
##   Gurobi optimizer is open... 
##   Finding the optimal matches... 
## Gurobi Optimizer version 9.1.0 build v9.1.0rc0 (win64)
## Thread count: 6 physical cores, 12 logical processors, using up to 12 threads
## Optimize a model with 60 rows, 84776 columns and 1441192 nonzeros
## Model fingerprint: 0xf9a678f3
## Variable types: 0 continuous, 84776 integer (84776 binary)
## Coefficient statistics:
##   Matrix range     [1e+00, 2e+04]
##   Objective range  [1e+00, 1e+00]
##   Bounds range     [0e+00, 0e+00]
##   RHS range        [0e+00, 0e+00]
## Found heuristic solution: objective -0.0000000
## Presolve time: 1.95s
## Presolved: 60 rows, 84776 columns, 1440986 nonzeros
## Variable types: 0 continuous, 84776 integer (84776 binary)
## 
## Root relaxation: objective 1.145804e+04, 389 iterations, 0.75 seconds
## 
##     Nodes    |    Current Node    |     Objective Bounds      |     Work
##  Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time
## 
##      0     0 11458.0374    0   31   -0.00000 11458.0374      -     -    2s
## H    0     0                    1772.0000000 11458.0374   547%     -    7s
##      0     0 11458.0374    0   31 1772.00000 11458.0374   547%     -    9s
## H    0     0                    11458.000000 11458.0374  0.00%     -    9s
##      0     0 11458.0374    0   31 11458.0000 11458.0374  0.00%     -    9s
## 
## Explored 1 nodes (389 simplex iterations) in 9.60 seconds
## Thread count was 12 (of 12 available processors)
## 
## Solution count 3: 11458 1772 -0 
## 
## Optimal solution found (tolerance 1.00e-04)
## Best objective 1.145800000000e+04, best bound 1.145800000000e+04, gap 0.0000%
##   Optimal matches found
#FEB2021= If I change to bmatch, error can't allocate vector size 3.4gb
end.time <- Sys.time()
time.taken <- end.time - start.time
# Fine balance (note here we are getting an approximate solution)
#for (i in 1:ncol(fine_covs)) {     
#   print(finetab(fine_covs[, i], t_id_1, c_id_1))
#}
# Indices of the treated units and matched controls
t_id_1 = out$t_id  
c_id_1 = out$c_id   
group = out$group_id    
ids_matched<-cbind.data.frame(t_id_1, c_id_1,group)

paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
## [1] "No. of treatments: 11,458; No. of controls: 11,458"
# Fine balance (note here we are getting an approximate solution)
finetab_match1<-data.frame()
for (i in 1:ncol(fine_covs)) {      
    #finetab_match1<- rbind.data.frame(
  finetab(fine_covs[, i], t_id_1, c_id_1)
}

d_match = CONS_SEP_match[c(t_id_1, c_id_1), ]

paste0("Number of duplicated rows: ",d_match %>%  dplyr::group_by(row) %>%  dplyr::mutate(n_row=n()) %>% dplyr::ungroup() %>% dplyr::filter(n_row>1) %>% nrow())
## [1] "Number of duplicated rows: 0"
paste0("Percentage of the selected treatments: ",scales::percent(length(t_id_1)/CONS_SEP_match %>% dplyr::filter(tipo_de_plan_res==1) %>% nrow()))
## [1] "Percentage of the selected treatments: 90%"
paste0("Percentage of the selected controls: ",
       scales::percent(length(c_id_1)/CONS_SEP_match %>% dplyr::filter(tipo_de_plan_res==0) %>% nrow()))
## [1] "Percentage of the selected controls: 16%"
#cuidado, el anterior me encontró más del mismo control para un tratado
#por eso ocuparé el de más abajo.
#EL DE A CONTINUACIÓN ES ERRÓNEO PORQUE ES POR POSICIÓN, NO POR COINCIDENCIA DEL NÚMERO CON LA FILA
#d_match_no_duplicates = CONS_SEP_match[which(CONS_SEP_match$row %in% c(t_id_1, c_id_1)), ]


Explore Results of the Matching


Age at Admission

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample

Age of Onset of Drug Use

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample

Date of Admission

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample

Figures 9-12. Empirical Cumulative Distribution Functions on the Matched Sample


Love plot

Figure 10. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 10. Love plot of the Matched Sample in Covariates v/s Unmatched Sample


Balance

Table 5. Covariate Balance in the Variables of Interest
Unadjusted
Adjusted
Variables Nature of Variables SMDs Threshold Variance Ratios Threshold KS SMDs Threshold Variance Ratios Threshold KS
Public Center Binary -1.18 Not Balanced, >0.1 0.51 0.00 Balanced, <0.1 0.00
Occ.Status-Employed Binary -0.95 Not Balanced, >0.1 0.41 0.00 Balanced, <0.1 0.00
Freq Drug Cons-Daily Binary 0.72 Not Balanced, >0.1 0.34 0.00 Balanced, <0.1 0.00
Occ.Status- Unemployed Binary 0.62 Not Balanced, >0.1 0.30 0.00 Balanced, <0.1 0.00
Drug Dependence Binary 0.59 Not Balanced, >0.1 0.22 0.00 Balanced, <0.1 0.00
Freq Drug Cons-2-3d/wk Binary -0.52 Not Balanced, >0.1 0.21 0.00 Balanced, <0.1 0.00
Motive Admission-Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17 0.00 Balanced, <0.1 0.00
Occ.Status- No activity Binary 0.38 Not Balanced, >0.1 0.11 0.00 Balanced, <0.1 0.00
Starting Substance-Alcohol Binary -0.36 Not Balanced, >0.1 0.17 0.00 Balanced, <0.1 0.00
>1 treatment Binary 0.33 Not Balanced, >0.1 0.14 0.23 Not Balanced, >0.1 0.10
ICD-10-Wo/Psych Comorbidity Binary -0.32 Not Balanced, >0.1 0.15 0.00 Balanced, <0.1 0.00
Treatments (#) Contin. 0.31 Not Balanced, >0.1 1.91 Balanced, <2 0.14 0.21 Not Balanced, >0.1 1.46 Balanced, <2 0.10
Marital Status-Married/Shared liv. arr. Binary -0.30 Not Balanced, >0.1 0.13 0.00 Balanced, <0.1 0.00
Date of Admission Contin. -0.29 Not Balanced, >0.1 1.00 Balanced, <2 0.14 -0.15 Not Balanced, >0.1 0.93 Balanced, <2 0.08
Marital Status-Single Binary 0.28 Not Balanced, >0.1 0.14 0.00 Balanced, <0.1 0.00
Motive Admission-Spontaneous Binary -0.27 Not Balanced, >0.1 0.13 0.00 Balanced, <0.1 0.00
Freq Drug Cons-1d/wk or more Binary -0.25 Not Balanced, >0.1 0.05 0.00 Balanced, <0.1 0.00
ICD-10-W/Psych Comorbidity Binary 0.23 Not Balanced, >0.1 0.11 0.00 Balanced, <0.1 0.00
Starting Substance-Marijuana Binary 0.23 Not Balanced, >0.1 0.10 0.00 Balanced, <0.1 0.00
Region-Arica(15) Binary 0.21 Not Balanced, >0.1 0.04 0.00 Balanced, <0.1 0.00
Starting Substance-Cocaine paste Binary 0.20 Not Balanced, >0.1 0.07 0.00 Balanced, <0.1 0.00
Age at Admission Contin. -0.19 Not Balanced, >0.1 0.84 Balanced, <2 0.07 0.06 Balanced, <0.1 0.98 Balanced, <2 0.04
Freq Drug Cons-Less 1d/wk Binary -0.19 Not Balanced, >0.1 0.03 0.00 Balanced, <0.1 0.00
Region-Tarapacá(01) Binary 0.16 Not Balanced, >0.1 0.03 0.00 Balanced, <0.1 0.00
Sex-Women Binary 0.15 Not Balanced, >0.1 0.07 0.00 Balanced, <0.1 0.00
Occ.Status- Not seeking work Binary 0.15 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Motive Admission-Justice Sector Binary -0.13 Not Balanced, >0.1 0.03 0.00 Balanced, <0.1 0.00
Educational Attainment-PS or less Binary 0.12 Not Balanced, >0.1 0.06 0.00 Balanced, <0.1 0.00
Region-Araucanía(09) Binary -0.12 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Magallanes(12) Binary -0.12 Not Balanced, >0.1 0.01 0.00 Balanced, <0.1 0.00
Freq Drug Cons-4-6d/wk Binary -0.11 Not Balanced, >0.1 0.04 0.00 Balanced, <0.1 0.00
Region-Antofagasta(02) Binary 0.11 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Coquimbo(04) Binary -0.10 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Aysén(11) Binary -0.09 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-Ñuble(16) Binary -0.09 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Age of Onset of Drug Use Contin. -0.09 Balanced, <0.1 0.91 Balanced, <2 0.07 0.00 Balanced, <0.1 1.01 Balanced, <2 0.01
ICD-10-Dg. Unknown/under study Binary 0.08 Balanced, <0.1 0.03 0.00 Balanced, <0.1 0.00
Freq Drug Cons-Did not use Binary -0.08 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Educational Attainment-HS or less Binary -0.07 Balanced, <0.1 0.03 0.00 Balanced, <0.1 0.00
Educational Attainment-More than HS Binary -0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region- Biobío(08) Binary -0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Valparaíso(05) Binary 0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Los Lagos(10) Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-Maule(07) Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Occ.Status-Inactive Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Motive Admission-Other Binary 0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-Atacama(03) Binary -0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-O’Higgins(06) Binary -0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Motive Admission-Health Sector Binary -0.02 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Marital Status-Widower Binary -0.02 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Occ.Status-Looking 1st job Binary -0.02 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Starting Substance-Other Binary 0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Marital Status-Separated/Divorced Binary -0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Region-Los Ríos(14) Binary -0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Region-Metropolitana(13) Binary -0.01 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Starting Substance-Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Note. Unadjusted (n=84,776) ; Adjusted (n=22,916) ; Total pairs: 11,458


Figure 13. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 13. Love plot of the Matched Sample in Covariates v/s Unmatched Sample


We allowed to tolerate fech_ing_num (SMD=0.16), because the date of admission not necessarily had to be strictly balanced, assuming that not every user had to be admitted to treatment in exact dates.

Survival Setting

Bivariate

We selected the first treatments,


irrs<-function(x, y="event", z="person_days",db){
  #x= variable que agrupa
  #y= evento explicado
  #z= person days
  #db= base de datos
  fmla <- as.formula(paste0(y,"~",x))
  fmla2 <- as.formula(paste0(z,"~",x))
assign(paste0("irr_",y,"_por_",x),
       rateratio.test::rateratio.test(
     x=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)],
     n=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)]
    )
   )
return(
  rateratio.test::rateratio.test(
     x=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)],
     n=as.numeric(xtabs(fmla2, data=get(db)))[c(2,1)]
      )
    )
}
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
# CHECK  DUPLICATED ROWS
#CONS_C1_df_dup_SEP_2020%>% 
#  dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% 
#  janitor::tabyl(condicion_ocupacional_corr)

# d_match %>% 
    #dplyr::group_by(row) %>% dplyr::mutate(rn_row=row_number()) %>% janitor::tabyl(rn_row)
#22,914

#
#d_match_surv %>% janitor::tabyl(duplicates_filtered,event)
#nrow(ids_matched)/2 =11,457

#CONS_SEP_match %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% janitor::tabyl(rn_hash)
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

ids_matched_filter<-
ids_matched %>% 
    dplyr::group_by(t_id_1) %>% 
    dplyr::mutate(rn_id=row_number()) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(rn_id==1)

ids_matched_rows<-cbind.data.frame("row_t"=CONS_SEP_match[c(t_id_1),"row"],
                        t_id_1,
                        "row_c"=CONS_SEP_match[c(c_id_1),"row"],
                        c_id_1) %>% 
  janitor::clean_names() %>% 
  dplyr::left_join(subset(ids_matched_filter,select=-c_id_1),by="t_id_1")

CONS_C1_df_dup_SEP_2020_irrs_health<-  
d_match %>% 
  dplyr::left_join(CONS_C1_df_dup_SEP_2020[c("row","dias_treat_imp_sin_na", "event", "person_days","fech_egres_num", "person_years","diff_bet_treat")],by="row") %>%
  dplyr::left_join(ids_matched_rows, by=c("row")) %>% 
  dplyr::mutate(group_match=ifelse(!is.na(group),group,NA)) %>% 
  dplyr::select(-rn_id,-group) %>% #glimpse()
  dplyr::rename("row_c"="row_2") %>% 
  dplyr::left_join(ids_matched_rows, by=c("row"="row_2")) %>% 
  dplyr::mutate(t_id_1=ifelse(!is.na(t_id_1.x),t_id_1.x,t_id_1.y)) %>% 
  dplyr::mutate(c_id_1=ifelse(!is.na(c_id_1.x),c_id_1.x,c_id_1.y)) %>% 
  dplyr::mutate(row_c=ifelse(!is.na(row_c),row_c,row.y)) %>% 
  dplyr::mutate(group_match=ifelse(!is.na(group),group,group_match)) %>% 

  dplyr::select(-t_id_1.x,-c_id_1.x,-t_id_1.y,-c_id_1.y,-group,-row.y,-rn_id) %>% #glimpse()
  
  dplyr::mutate(res_drop_out=dplyr::case_when(
  tipo_de_plan_res==1 & abandono_temprano_rec==TRUE ~1,
  TRUE~0)) %>% 
  dplyr::mutate(min_ach=dplyr::case_when(
  evaluacindelprocesoteraputico=="3-Minimum Achievement" ~1,
  TRUE~0)) %>% 
  dplyr::mutate(res_drop_out=factor(res_drop_out)) %>% 
    dplyr::mutate(min_ach=factor(min_ach)) %>% 
  dplyr::mutate(status_censorship=dplyr::case_when(
  motivodeegreso_mod_imp=="Ongoing treatmentt" ~1,
  TRUE~0)) %>% 

  dplyr::mutate(outcome_to_readmission= dplyr::case_when(
                        event==1~ (diff_bet_treat)/365.25,# & grepl("",comp_status)
                        event==0~ (as.numeric(as.Date("2019-11-13"))-fech_egres_num)/365.25)) %>% 
  dplyr::mutate(admission_to_readmission= dplyr::case_when(
                        event==1~ (diff_bet_treat+dias_treat_imp_sin_na)/365.25,# & grepl("",comp_status)
                        event==0~ (as.numeric(as.Date("2019-11-13"))-fech_ing_num)/365.25))
  
# CONS_C1_df_dup_SEP_2020_irrs_health%>% janitor::tabyl(cnt_diagnostico_trs_fisico_irr)
#label(CONS_C1_df_dup_SEP_2020_prev4_explore$dg_fis_anemia) <- "Physical Dg. Anemia"
#   cnt_mod_cie_10_or cnt_otros_probl_at_sm_or

#22,914
#d_match %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% nrow()

#27 Y ALGO
#CONS_C1_df_dup_SEP_2020_irrs_health %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% nrow()

# HAY UN SEGUNDO TRATAMIENTO PARA 4,565 CASOS
#PARA VER SI HAY MAS DE UN CASO POR USUARIO
#CONS_C1_df_dup_SEP_2020_irrs_health %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% janitor::tabyl(rn_hash)

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#irrs_min_ach & irrs_res_early- outcome to readmission  
irrs_early_drop<-irrs(x="abandono_temprano_rec", z="outcome_to_readmission", db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_res_plan<-irrs(x="tipo_de_plan_res" ,z="admission_to_readmission", db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_res_early<-irrs(x="res_drop_out" ,z="outcome_to_readmission", db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_min_ach<-irrs(x="min_ach" , z="outcome_to_readmission", db="CONS_C1_df_dup_SEP_2020_irrs_health")


The incidence rate of readmission was 0.97 (95% IC 0.91-1.03) in users that had at least an early dropout, compared with users that did not have a physical condition at baseline (p= 0.372).


Figure 12. Cum. Hazards to Experience Readmission to SUD Treatment, by Ealy Dropout of Treatment at Baseline

Figure 12. Cum. Hazards to Experience Readmission to SUD Treatment, by Ealy Dropout of Treatment at Baseline


The incidence rate of readmission was 1.49 (95% IC 1.42-1.56) in users that had a residential plan, compared with users that had an ambulatory plan at baseline (p<0.001).


Figure 13. Cum. Hazards to Experience Readmission to SUD Treatment, by Type of Plan at Baseline

Figure 13. Cum. Hazards to Experience Readmission to SUD Treatment, by Type of Plan at Baseline


The incidence rate of readmission was 1.24 (95% IC 1.15-1.34) in users that had a residential plan and an early dropout, compared with the rest of users at baseline (p<0.001).


Figure 14. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person in a Residential Treatment with an Early Dropout

Figure 14. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person in a Residential Treatment with an Early Dropout


The incidence rate of readmission was 1.16 (95% IC 1.1-1.22) in users that had a minimum achievement of the therapeutic goals, compared with the rest of users at baseline (p<0.001).


Figure 15. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person had a Minimum Achievement in Therapeutic Goals

Figure 15. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person had a Minimum Achievement in Therapeutic Goals


Multivariate


Inference for the regression coefficients is based on a within-pair treatment effect.


#The stratified Cox model can be used to perform Cox regression on matched designs by using stratification but it can also be done by modeling with frailties

#Some believe that accounting for the matching isn't necessary at all, since it doesn't affect beta coefficients materially and the variables which you have matched on can simply be adjusted for as covariates in the model; this is sufficient in most cases.

#A matched cohort study involves pairs (or clusters in case several untreated subjects are matched with each of the treated individuals) formed to include individuals who differ with respect to treatment but may be matched on certain baseline characteristics.

# Two common methods for analyzing paired/clustered survival data involve a stratified and a marginal Cox model, which represent 2 different approaches of accounting for potential correlation between paired outcomes (for discussion see Glidden and Vittinghoff [5]).

#A regression model is often a more powerful tool in detecting treatment effect than a matched study.

#Choices in study design are regression modeling or matched-pairs study.

#Brazauskas, R., & Logan, B. R. (2016). Observational Studies: Matching or Regression? Biology of Blood and Marrow Transplantation, 22(3), 557–563. doi:10.1016/j.bbmt.2015.12.005 



#simple expression of the common HR estimator would be a useful summary of exposure effect

#Shinozaki, T., Mansournia, M. A., & Matsuyama, Y. (2017). On hazard ratio estimators by proportional hazards models in matched-pair cohort studies. Emerging themes in epidemiology, 14, 6. https://doi.org/10.1186/s12982-017-0060-8

# "The covariate effects are so odd that we'll never model them correctly, so treat each combination as unique."The data set two needs to have each treated subject + their controls in a separate stratum - Terry Therneau

#Stratified approach
#For each pair, there is an unspecified baseline hazard function. The partial likelihood idea is readily adapted by multiplying the partial likelihoods specific to each stratum.
##Pros: Lack of structure. Cons: It does not provide any information about heterogeneity between pairs; Pairs in which both members shared the same covariate information or which provide only censoring observations do not contribute to the likelihood; this is because no between-pair comparisons are attempted. Heterogeneity is not described by a single parameter as frailty;

# Austin PC. A critical appraisal of propensity-score matching in the medical literature between 1996 and 2003. STATISTICS IN MEDICINE. Statist. Med. 2008; 27:2037–2049

#https://www.duo.uio.no/bitstream/handle/10852/10289/stat-res-11-97.pdf?sequence=1&isAllowed=y

memory.limit(size = 20000)
## [1] 32638
#Classical stratified tests

#This statistics reduces to the difference in the number of events in the 2 samples which occurr while both patients in the pair are at risk given the appropiate weight. 

#Klein, J. & Moeschberger, M. (2003) Survival Analysis: Statistical Methods for Censored and Truncated Data. 2nd Edition. Springer-Verlag. 

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
m1 <- coxph(Surv(diff_bet_treat,event) ~ strata(group_match) + tipo_de_plan_res, data = CONS_C1_df_dup_SEP_2020_irrs_health)

summary(m1)
## Call:
## coxph(formula = Surv(diff_bet_treat, event) ~ strata(group_match) + 
##     tipo_de_plan_res, data = CONS_C1_df_dup_SEP_2020_irrs_health)
## 
##   n= 6398, number of events= 6398 
##    (16518 observations deleted due to missingness)
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)    
## tipo_de_plan_res 0.34731   1.41525  0.06943 5.002 5.67e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## tipo_de_plan_res     1.415     0.7066     1.235     1.622
## 
## Concordance= 0.586  (se = 0.024 )
## Likelihood ratio test= 25.4  on 1 df,   p=5e-07
## Wald test            = 25.02  on 1 df,   p=6e-07
## Score (logrank) test = 25.27  on 1 df,   p=5e-07
cox.zph(m1)#Possibly, a log-normal or log-logistic AFT model would fit better than Cox.
##                  chisq df     p
## tipo_de_plan_res  9.52  1 0.002
## GLOBAL            9.52  1 0.002
m1b <- try_with_time_limit(
            survreg(Surv(diff_bet_treat+1,event)~ strata(group_match)+ tipo_de_plan_res,data=CONS_C1_df_dup_SEP_2020_irrs_health, dist="weibull"),
        elapsed = 60)
        
#The survreg function in R does not allow time = 0. This is because for several of the distributions, including the lognormal distribution, having events occur at time = 0 will result in an undefined estimator.
(m1b)

m2 <- eval_fork(
        coxph(Surv(diff_bet_treat,event) ~ frailty(group_match, 
          distribution = "gaussian", sparse = FALSE, method = "reml") + tipo_de_plan_res, 
          data = CONS_C1_df_dup_SEP_2020_irrs_health),
      timeout = 60)
summary(m2)

cox.zph(m2)
#CONS_C1_df_dup_SEP_2020$condicion_ocupacional_corr CONS_C1_df_dup_SEP_2020$cnt_diagnostico_trs_fisico CONS_C1_df_dup_SEP_2020$tenencia_de_la_vivienda_mod

##COx Diagnostics
#ggcoxzph(cox.zph(m1))
#ggcoxdiagnostics(m1, type = "dfbeta",
#                 linear.predictions = FALSE, ggtheme = theme_bw())
#ggcoxdiagnostics(m1, type = "deviance",
#                 linear.predictions = FALSE, ggtheme = theme_bw())
#It’s also possible to check outliers by visualizing the deviance residuals. The deviance residual is a normalized transform of the martingale residual. These residuals should be roughtly symmetrically distributed about zero with a standard deviation of 1.
#Positive values correspond to individuals that “died too soon” compared to expected survival times.
#Negative values correspond to individual that “lived too long”.
#Very large or small values are outliers, which are poorly predicted by the model.

#grid.arrange(
#  ggforest(m1, data=CONS_C1_df_dup_SEP_2020_irrs_health),
#  ggforest(m2, data=CONS_C1_df_dup_SEP_2020_irrs_health),
#  ncol=2
#)


There was evidence of not proportional hazards. Users in residential treatments experience 42% within the study period than users in outpatient treatments (95% CI: 24% - 62%; p=0).


Multistate


#  dplyr::filter(motivodeegreso_mod_imp!="En curso")%>% #Sacar los tratamientos que estén en curso 


tab1_lab<- paste0('Original C1 Dataset \n(n = ', formatC(nrow(CONS_C1), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1%>% dplyr::distinct(HASH_KEY)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab2_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab1_5_lab<- paste0('&#8226; Duplicated entries\\l &#8226; Overlapping treatments of users\\l &#8226; Intermediate events of treatment (continuous referrals)')
tab4_lab<- paste0('Imputed C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab3_5_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab6_lab<- paste0('C1 Matched Sample\nin Treatment Setting \n(n = ', formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% nrow(), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')

lab_tab<- paste0("  Result of the matching on treatment setting\nNo. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))

#https://stackoverflow.com/questions/46750364/diagrammer-and-graphviz
#https://mikeyharper.uk/flowcharts-in-r-using-diagrammer/
#http://blog.nguyenvq.com/blog/2012/05/29/better-decision-tree-graphics-for-rpart-via-party-and-partykit/
#http://blog.nguyenvq.com/blog/2014/01/17/skeleton-to-create-fast-automatic-tree-diagrams-using-r-and-graphviz/
#https://cran.r-project.org/web/packages/DiagrammeR/vignettes/graphviz-mermaid.html
#https://stackoverflow.com/questions/39133058/how-to-use-graphviz-graphs-in-diagrammer-for-r
#https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781789802566/1/ch01lvl1sec21/creating-diagrams-via-the-diagrammer-package
#https://justlegal.be/2019/05/using-flowcharts-to-display-legal-procedures/
# paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
#
library(DiagrammeR) #⋉
grViz("digraph flowchart {
      # node definitions with substituted label text
      node [fontname = Times, shape = rectangle,fontsize = 9]        
      tab1 [label = '@@1']
      tab2 [label = '@@2']
      tab3 [label = '&#8226;Duplicated entries\\l&#8226;Intermediate events of treatment (continuous referrals)\\l',fontsize = 7]
      tab4 [label = '@@4']
      blank [label = '', width = 0.0001, height = 0.0001]
      blank2 [label = '', width = 0.0001, height = 0.0001]
      blank3 [label = '', width = 0.0001, height = 0.0001]
      tab5 [label = '&#8226;Logically Inconsistent candidates for imputation\\l&#8226;Ties in candidates for imputation\\l',fontsize = 7]
      tab6 [label= '@@6']
      tab7 [label = '&#8226;Matching pairs based on balance of covariates at basline,\\l&#8226;Pairs 1:1\\l',fontsize = 7]
      
      # edge definitions with the node IDs
      tab1 -> blank [arrowhead = none,label='  Data wrangling and normalization process',fontsize = 8];
      blank -> tab3
      blank -> tab2
      tab2 -> blank2 [arrowhead = none];
      blank2 -> tab5 
      blank2 -> tab4 [label='  Result of the imputation of missing values',fontsize = 8];
      tab4 -> blank3 [arrowhead= none];
      blank3-> tab7
      blank3 -> tab6 [label='@@7',fontsize = 8];
            subgraph {
              rank = same; tab3; blank;
            }
            subgraph {
              rank = same; tab5; blank2;
            }
            subgraph {
              rank = same; tab7; blank3;
            }
      }

      [1]:  tab1_lab
      [2]:  tab2_lab
      [3]:  tab1_5_lab
      [4]:  tab4_lab
      [5]:  ''
      [6]:  tab6_lab
      [7]:  lab_tab
      ")
#      {rank=same; 'tab2'' -> tab3 [label='',fontsize = 11]}; #⋉
#CONS_C1_df_dup_SEP_2020_irrs_health
Table 6. Summary descriptives table
Variables Ambulatory Residential Sig.
N=17154 N=15122
Motive of Admission to Treatment (First Entry): <0.001
Spontaneous 6994 (40.8%) 5567 (36.8%)
Assisted Referral 2940 (17.1%) 3087 (20.4%)
Other 954 (5.56%) 919 (6.08%)
Justice Sector 1229 (7.16%) 985 (6.51%)
Health Sector 5037 (29.4%) 4564 (30.2%)
Psychiatric Comorbidity: <0.001
Without psychiatric comorbidity 4719 (27.5%) 3845 (25.4%)
Diagnosis unknown (under study) 3510 (20.5%) 3305 (21.9%)
With psychiatric comorbidity 8925 (52.0%) 7972 (52.7%)
Sexo Usuario/Sex of User: 0.069
Men 11433 (66.6%) 10224 (67.6%)
Women 5721 (33.4%) 4898 (32.4%)
Age at Admission to Treatment 32.7 [26.7;40.7] 33.0 [26.9;41.0] 0.045
Treatment Length (>90): <0.001
FALSE 14035 (81.8%) 12088 (79.9%)
TRUE 3119 (18.2%) 3028 (20.0%)
‘Missing’ 0 (0.00%) 6 (0.04%)
Treatments by User (#): 0.006
1 8857 (51.6%) 7661 (50.7%)
2 4666 (27.2%) 4108 (27.2%)
3 2172 (12.7%) 1920 (12.7%)
4 924 (5.39%) 840 (5.55%)
5 316 (1.84%) 354 (2.34%)
6 157 (0.92%) 155 (1.02%)
7 44 (0.26%) 54 (0.36%)
8 18 (0.10%) 30 (0.20%)
More than one treatment: 0.084
0 8857 (51.6%) 7661 (50.7%)
1 8297 (48.4%) 7461 (49.3%)
Starting Substance: <0.001
Alcohol 7450 (43.4%) 6204 (41.0%)
Cocaine hydrochloride 746 (4.35%) 637 (4.21%)
Cocaine paste 2498 (14.6%) 2377 (15.7%)
Marijuana 6036 (35.2%) 5521 (36.5%)
Other 424 (2.47%) 383 (2.53%)
Marital Status: <0.001
Married/Shared living arrangements 4275 (24.9%) 3473 (23.0%)
Separated/Divorced 1874 (10.9%) 1565 (10.3%)
Single 10819 (63.1%) 9936 (65.7%)
Widower 186 (1.08%) 148 (0.98%)
Educational Attainment: 0.006
3-Completed primary school or less 5239 (30.5%) 4843 (32.0%)
2-Completed high school or less 8912 (52.0%) 7775 (51.4%)
1-More than high school 3003 (17.5%) 2504 (16.6%)
Frequency of use of primary drug: <0.001
1 day a week or more 592 (3.45%) 327 (2.16%)
2 to 3 days a week 2504 (14.6%) 1570 (10.4%)
4 to 6 days a week 2447 (14.3%) 1972 (13.0%)
Daily 10879 (63.4%) 10993 (72.7%)
Did not use 340 (1.98%) 106 (0.70%)
Less than 1 day a week 392 (2.29%) 154 (1.02%)
Public Center: <0.001
FALSE 9904 (57.7%) 10750 (71.1%)
TRUE 7250 (42.3%) 4372 (28.9%)
Minimum Achievement in the Therapeutic Process: <0.001
Ongoing treatment 1164 (6.79%) 660 (4.36%)
Minimum achievement 8436 (49.2%) 6190 (40.9%)
High/Medium achievement 7554 (44.0%) 8272 (54.7%)
Drug Dependence: <0.001
FALSE 2077 (12.1%) 1316 (8.70%)
TRUE 15077 (87.9%) 13806 (91.3%)
Age of Onset of Drug Use 15.0 [14.0;17.0] 15.0 [13.0;17.0] 0.031
Occupational Status: <0.001
Employed 3816 (22.2%) 2011 (13.3%)
Inactive 1881 (11.0%) 1539 (10.2%)
Looking for a job for the first time 32 (0.19%) 23 (0.15%)
No activity 1858 (10.8%) 2134 (14.1%)
Not seeking for work 350 (2.04%) 400 (2.65%)
Unemployed 9217 (53.7%) 9015 (59.6%)
Days of Treatment (missing dates of discharge were replaced with difference from 2019-11-13) 153 [84.0;276] 151 [66.0;277] <0.001
Users with Posterior Treatments (=1): 0.084
0 8857 (51.6%) 7661 (50.7%)
1 8297 (48.4%) 7461 (49.3%)
User’s Days available in the system for the study 408 [146;1175] 401 [152;1093] 0.018
User’s Years available in the system for the study 1.12 [0.40;3.22] 1.10 [0.42;2.99] 0.018
Days of difference between the Next Treatment 347 [137;780] 263 [72.0;692] <0.001
Treatment Successful Completion: <0.001
Ongoing treatment 1164 (6.79%) 660 (4.36%)
Completion 3150 (18.4%) 4372 (28.9%)
Non-completion 12840 (74.9%) 10090 (66.7%)
Cause of Discharge: <0.001
Administrative discharge 1475 (8.60%) 1902 (12.6%)
Early Drop-out 3119 (18.2%) 3028 (20.0%)
Late Drop-out 6047 (35.3%) 2976 (19.7%)
Ongoing treatment 1164 (6.79%) 660 (4.36%)
Referral to another treatment 2199 (12.8%) 2184 (14.4%)
Therapeutic discharge 3150 (18.4%) 4372 (28.9%)
Note. Variables of C1 dataset had to be standardized before comparison;
Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;
Categorical variables are presented as number (%)


After matching, we selected 32,276 treatments (users=22,916).


library(Epi)
#For censored state transitions it can be awkward having to replicate the censoring time for each non-visited state
#https://github.com/stulacy/multistateutils
states_trans<-c("Admission",    "TD",   "DWCA", "Readmission",  "TD2", "DWCA2", "Readmission2", "TD3",  "DWCA3",    "Readmission3")

trans_matrix <- matrix(c(
NA,1,2,3,NA,NA,NA,NA,NA,NA,
NA,NA,NA,4,NA,NA,NA,NA,NA,NA,
NA,NA,NA,5,NA,NA,NA,NA,NA,NA,
NA,NA,NA,NA,6,7,8,NA,NA,NA,
NA,NA,NA,NA,NA,NA,9,NA,NA,NA,
NA,NA,NA,NA,NA,NA,10,NA,NA,NA,
NA,NA,NA,NA,NA,NA,NA,11,12,13,
NA,NA,NA,NA,NA,NA,NA,NA,NA,14,
NA,NA,NA,NA,NA,NA,NA,NA,NA,15,
NA,NA,NA,NA,NA,NA,NA,NA,NA,NA
                          ), nrow=10, ncol=10,
                          byrow=TRUE,
                          dimnames=list(from=states_trans,to=states_trans))

#All possible paths through the multi-state model can be found here:
#paths(mat_3_states)
boxes.Lexis(trans_matrix, wmult=1.3, hmult=1.3, cex=.9,
             boxpos = list(x = c(14.28571429/2, rep(14.28571429*2-14.28571429/2,2), 14.28571429*3-14.28571429/2, rep(14.28571429*4-14.28571429/2,2), 14.28571429*5-14.28571429/2, rep(14.28571429*6-14.28571429/2,2), 14.28571429*7-14.28571429/2), 
                           y = c(rep(c(50, 80, 20),3),50)),
            txt.arr=c(expression(" 1) " *lambda['01']), 
                      expression(" 2) " *lambda['02']),
                      expression(" 3) " *lambda['03']),
                      expression(" 4) " *lambda['13']),
                      expression(" 5) " *lambda['23']),
                      expression(" 6) " *lambda['34']),
                      expression(" 7) " *lambda['35']),
                      expression(" 8) " *lambda['36']),
                      expression(" 9) " *lambda['46']),
                      expression(" 10) " *lambda['56']),
                      expression(" 11) " *lambda['67']),
                      expression(" 12) " *lambda['68']),
                      expression(" 13) " *lambda['69']),
                      expression(" 14) " *lambda['79']),
                      expression(" 15) " *lambda['89'])
                      ))
title(sub = "No recurring states; Absorving state: Third Readmission (97.1% of the registries, considering that each registry had a time-to-readmission);\nOther causes of discharge were not events of interest") ## internal titles


To the first and second states, we subtracted one day if it overlaps with the date of discharge. For the third and the following states, we added one day in case of overlapping dates due to continous treatments.


### diff_bet_treat is the variable that includes time-to-readmission
### AGS: Parten en 0, salvo que estén truncados a la izquierda. 
### variables should start with time_ & status_
### Transform to years once generated
### Parece que todos comparten un mismo tiempo ojetivo.
### AGS: Cuando hay un estado seguido no es necesario interval censoring, se dn en tun tiempo continuo
### 0's are censored status
### 2021-03-24: Eliminate cases with readmissions posterior to interval-censored discharges
library(mstate)

d_match_surv_msprep<-
  d_match_surv %>% 
  dplyr::select(id, duplicates_filtered, fech_ing_num,fech_egres_num,dias_treat_imp_sin_na,fech_ing_next_treat,tipo_de_plan_res,motivodeegreso_mod_imp,min_achievement,group_match,dup) %>% 
  ## Filter cases with 4 or more registries
  #sum(prop.table(table(d_match_surv$dup))[1:3])
  dplyr::filter(dup<4) %>% 
  #dplyr::mutate(tipo_de_plan_res_baseline=tipo_de_plan_res) %>% 
  tidyr::pivot_wider(id_cols=c("id","group_match","duplicates_filtered"), names_from=dup, names_sep="_", values_from=c("fech_ing_num","fech_egres_num","dias_treat_imp_sin_na","fech_ing_next_treat","tipo_de_plan_res","motivodeegreso_mod_imp","min_achievement")) %>% 
  #,"tipo_de_plan_res_baseline"
  dplyr::arrange(id) %>%
  dplyr::select(id, group_match,duplicates_filtered,everything()) %>% 
  #display error if there are more than row per user
  purrr::when(dplyr::group_by(.,id) %>% dplyr::count() %>% filter(n>1) %>% nrow()>0 ~ stop("more than one cases by row"), 
              ~.) %>% 
  #22,926 x 20
#_#_#_#_#_#_#_#_#_
## 1st STATE
  dplyr::mutate(TD_status=if_else(motivodeegreso_mod_imp_1=="Therapeutic discharge",1,0,0)) %>% 
  # For problems with 0 intervals between a new treatment and other between the admission and discharge, I subtract 1 day of admission. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
  # There are no missing values for the dates of discharge, so we use them
  dplyr::mutate(cambio_fecha_ing_1= dplyr::case_when(fech_egres_num_1-fech_ing_num_1==0~1 ,T~0)) %>%
  dplyr::mutate(fech_ing_num_1= dplyr::case_when(cambio_fecha_ing_1==1~fech_ing_num_1-1,T~fech_ing_num_1)) %>% 
  dplyr::mutate(dias_treat_imp_sin_na_1= dplyr::case_when(cambio_fecha_ing_1==1~fech_egres_num_1-fech_ing_num_1, T~fech_egres_num_1-fech_ing_num_1)) %>% 
  #If status=1, the corresponding transition has been observed. If not, it assumes not observed
#_#_#_#_#_#_#_#_#_
## 2nd STATE
  dplyr::mutate(DWCA_status=if_else(motivodeegreso_mod_imp_1 %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0,0)) %>% 
#_#_#_#_#_#_#_#_#_
## 3rd STATE- Readmission
  # Time of arrival at the state of readmission: diff_bet_treat_1=fech_ing_next_treat_1-fech_egres_num_1
  # For problems with 0 intervals between a new treatment and other between the admission and discharge, I add 1 day of admission to the next treatment. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
  dplyr::mutate(Readmission_status=if_else(!is.na(fech_ing_next_treat_1),1,0,0)) %>%
  dplyr::mutate(cambio_fecha_ing_readm_1= dplyr::case_when(fech_ing_next_treat_1-fech_egres_num_1==0~1 ,T~0)) %>%
  #around 100 users
  dplyr::mutate(fech_ing_next_treat_1= dplyr::case_when(cambio_fecha_ing_readm_1==1~fech_ing_next_treat_1+1,T~fech_ing_next_treat_1)) %>% 
  dplyr::mutate(diff_bet_treat_1= fech_ing_next_treat_1-fech_egres_num_1) %>% 
  #dplyr::filter(diff_bet_treat_corr!=diff_bet_treat)
#_#_#_#_#_#_#_#_#_
## 1st & 2nd STATE
## ADD DATES TO THE FINAL FOLLOW UP OF THE STUDY IF CENSORED. in all the available period of the user, the status was not observed
#_#_#_#_#_#_#_#_#_
  dplyr::mutate(TD_time= dplyr::case_when(TD_status==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num_1,
                                          T~fech_egres_num_1-fech_ing_num_1)) %>% 
  dplyr::mutate(DWCA_time= dplyr::case_when(DWCA_status==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num_1,
                                          T~fech_egres_num_1-fech_ing_num_1)) %>% 
    purrr::when(dplyr::filter(.,fech_egres_num_1-fech_ing_num_1<=0) %>% nrow()>0 ~ stop("There are still treatments with negative length or 0 days"), 
              ~.) %>%
#_#_#_#_#_#_#_#_#_
## 4th STATE  
  dplyr::mutate(TD2_status=if_else(motivodeegreso_mod_imp_2=="Therapeutic discharge",1,0,0)) %>% 
  # For problems with 0 intervals between a new treatment and other between the admission and discharge, I subtract 1 day of admission. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
  # There are no missing values for the dates of discharge, so we use them
  # check cases where the date of admission differed more than one day to the day of the next treat, to follow the transformation made in previous states
  purrr::when(dplyr::mutate(.,diff_bet_readm_ing2= fech_ing_next_treat_1-fech_ing_num_2) %>%  dplyr::filter(!diff_bet_readm_ing2 %in% c(1,0,NA)) %>% nrow()>0 ~ stop("Differences different from 0 or one with a varaible that should be the same"), 
              ~.) %>% 
  #correct dates for the ones that had changed due to the adjustment of the 3rd state
  dplyr::mutate(fech_ing_num_2=dplyr::case_when(cambio_fecha_ing_readm_1==1~ fech_ing_next_treat_1,T~fech_ing_num_2)) %>% 
  dplyr::mutate(cambio_fecha_ing_2= dplyr::case_when(fech_egres_num_2-fech_ing_num_2==0~1 ,T~0)) %>%
# different for the 1st state, I needed to add one day more  NOW TO THE DATE OF DISCHARGE
  dplyr::mutate(fech_egres_num_2= dplyr::case_when(cambio_fecha_ing_2==1~fech_egres_num_2+1,T~fech_egres_num_2)) %>% 
  dplyr::mutate(dias_treat_imp_sin_na_2= fech_egres_num_2-fech_ing_num_2) %>% 
  purrr::when(dplyr::filter(.,dias_treat_imp_sin_na_2<=0) %>% nrow()>0 ~ stop("There are still treatments overlapped or wrong placed"), 
              ~.) %>%
#_#_#_#_#_#_#_#_#_
## 5th STATE
  dplyr::mutate(DWCA2_status=ifelse(motivodeegreso_mod_imp_2 %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0)) %>% 
#_#_#_#_#_#_#_#_#_
## 6th STATE- Readmission2
  # For problems with 0 intervals between a new treatment and other between the admission and discharge, I add 1 day of admission to the next treatment. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
  dplyr::mutate(Readmission2_status=if_else(!is.na(fech_ing_next_treat_2),1,0,0)) %>%
  # In case of two continuous treatments, we may have a date of discharge that had a difference 0 with admission, but having added previously a day would end having a difference of minus 1
  dplyr::mutate(cambio_fecha_ing_readm_2= dplyr::case_when(fech_ing_next_treat_2-fech_egres_num_2==0~1, fech_ing_next_treat_2-fech_egres_num_2==-1~2, T~0)) %>%
  dplyr::mutate(fech_ing_next_treat_2= dplyr::case_when(cambio_fecha_ing_readm_2==1~fech_ing_next_treat_2+1,cambio_fecha_ing_readm_2==2~fech_ing_next_treat_2+2,T~fech_ing_next_treat_2)) %>% 
  dplyr::mutate(diff_bet_treat_2= fech_ing_next_treat_2-fech_egres_num_2) %>% 
  purrr::when(dplyr::filter(.,diff_bet_treat_2<=0) %>% nrow()>0 ~ stop("The end of the treatment overlapped or is misplaced from the next treatment"), 
              ~.) %>%
#_#_#_#_#_#_#_#_#_
## 4th & 5th STATE
## ADD DATES TO THE FINAL FOLLOW UP OF THE STUDY IF CENSORED,in all the available period of the user, the status was not observed
#_#_#_#_#_#_#_#_#_
  dplyr::mutate(TD2_time= dplyr::case_when(TD2_status==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num_1,
                                           T~fech_egres_num_2-fech_ing_num_2)) %>% 
  dplyr::mutate(DWCA2_time= dplyr::case_when(DWCA2_status==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num_1,
                                           T~fech_egres_num_2-fech_ing_num_2)) %>% 
  #dplyr::filter(diff_bet_treat_corr!=diff_bet_treat)
#_#_#_#_#_#_#_#_#_
## 7th STATE  
  dplyr::mutate(TD3_status=if_else(motivodeegreso_mod_imp_3=="Therapeutic discharge",1,0,0)) %>% 
  # For problems with 0 intervals between a new treatment and other between the admission and discharge, I subtract 1 day of admission. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
  # There are no missing values for the dates of discharge, so we use them
  # check cases where the date of admission differed more than one day to the day of the next treat, to follow the transformation made in previous states
  # For this state, we added differences of 2 as admissible.
  purrr::when(dplyr::mutate(.,diff_bet_readm_ing3= fech_ing_next_treat_2-fech_ing_num_3)%>% dplyr::filter(!diff_bet_readm_ing3 %in% c(2,1,0,NA))%>% nrow()>0 ~ stop("There are cases with differences different than 0 to 2 days to a variable that should be the same"), 
              ~.) %>%
  #correct dates for the ones that had changed due to the adjustment of the 5th state
  dplyr::mutate(fech_ing_num_3=dplyr::case_when(cambio_fecha_ing_readm_2==1~ fech_ing_next_treat_2,T~fech_ing_num_3)) %>% 
  purrr::when(dplyr::mutate(.,diff_bet_readm_ing3= fech_ing_next_treat_2-fech_ing_num_3)%>% dplyr::filter(!diff_bet_readm_ing3 %in% c(0,NA))%>% nrow()>0 ~ stop("There are cases with differences different than 0 days of a variable that should be equal"), 
              ~.) %>%
  
  dplyr::mutate(cambio_fecha_ing_3= dplyr::case_when(fech_egres_num_3-fech_ing_num_3==0~1,fech_egres_num_3-fech_ing_num_3==-1~2,T~0)) %>%
# different for the 1st state, I needed to add one day more NOW TO THE DATE OF DISCHARGE
  dplyr::mutate(fech_egres_num_3= dplyr::case_when(cambio_fecha_ing_3==1~fech_egres_num_3+1,cambio_fecha_ing_3==2~fech_egres_num_3+2,T~fech_egres_num_3)) %>% 
  dplyr::mutate(dias_treat_imp_sin_na_3= fech_egres_num_3-fech_ing_num_3) %>% 
  purrr::when(dplyr::filter(.,dias_treat_imp_sin_na_3<=0) %>% nrow()>0 ~ stop("There are still treatments overlapped or wrong placed"), 
              ~.) %>%  
#_#_#_#_#_#_#_#_#_
## 8th STATE
  dplyr::mutate(DWCA3_status=if_else(motivodeegreso_mod_imp_3 %in% c("Early Drop-out","Late Drop-out","Administrative discharge"),1,0)) %>% 
#_#_#_#_#_#_#_#_#_
## 9th STATE- Readmission3
  # For problems with 0 intervals between a new treatment and other between the admission and discharge, I add 1 day of admission to the next treatment. Then, and if there is a change in the date, we replace it. If not, we maintain the original value
  dplyr::mutate(Readmission3_status=if_else(!is.na(fech_ing_next_treat_3),1,0,0)) %>%
  # In case of two continuous treatments, we may have a date of discharge that had a difference 0 with admission, but having added previously a day would end having a difference of minus 1, and minus 2
  dplyr::mutate(cambio_fecha_ing_readm_3= dplyr::case_when(fech_ing_next_treat_3-fech_egres_num_3==0~1, fech_ing_next_treat_3-fech_egres_num_3==-1~2,fech_ing_next_treat_3-fech_egres_num_3==-2~3, T~0)) %>%
  dplyr::mutate(fech_ing_next_treat_3= dplyr::case_when(cambio_fecha_ing_readm_3==1~fech_ing_next_treat_3+1,cambio_fecha_ing_readm_3==2~fech_ing_next_treat_3+2,cambio_fecha_ing_readm_3==2~fech_ing_next_treat_3+3,T~fech_ing_next_treat_3)) %>% 
  dplyr::mutate(diff_bet_treat_3= fech_ing_next_treat_3-fech_egres_num_3) %>% 
  purrr::when(dplyr::filter(.,diff_bet_treat_3<=0) %>% nrow()>0 ~ stop("The end of the treatment overlapped or is misplaced from the next treatment"), 
              ~.) %>%
  dplyr::rename("Readmission_time"="diff_bet_treat_1","Readmission2_time"="diff_bet_treat_2","Readmission3_time"="diff_bet_treat_3") %>% 
 #_#_#_#_#_#_#_#_#_
## 7th & 8th STATE
## ADD DATES TO THE FINAL FOLLOW UP OF THE STUDY IF CENSORED,in all the available period of the user, the status was not observed
#_#_#_#_#_#_#_#_#_
  dplyr::mutate(TD3_time= dplyr::case_when(TD3_status==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num_1,
                                           T~fech_egres_num_3-fech_ing_num_3)) %>% 
  dplyr::mutate(DWCA3_time= dplyr::case_when(DWCA3_status==0~ as.numeric(as.Date("2019-11-13"))-fech_ing_num_1,
                                           T~fech_egres_num_3-fech_ing_num_3)) %>% 
  #dplyr::filter(diff_bet_treat_corr!=diff_bet_treat)
#_#_#_#_#_#_#_#_#_, 
 #TD_status, DWCA_status, Readmission_status, TD2_status, DWCA2_status, Readmission2_status, TD3_status, DWCA3_status, Readmission3_status
 #TD_time, DWCA_time, Readmission_time, TD2_time, DWCA2_time, Readmission2_time, TD3_time, DWCA3_time, Readmission3_time
#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_
  
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
## 2021-03-24, I had to reespecify times to objective times, in order to avoid further problems
## 2021-03-24, The posterior treatment has to include the days in the previous treatment
## 2021-05-05, define dates for intermediate states
## 2021-05-06, CENSORED TIME IS NOT THE DIFFERENCE BETWEEN THE TIME OF CENSORSIP AND THE TIME OF THE LAST EVENT, IS THE TOTAL DIFFERENCE. THE SUM OF DAYS UNTIL THE FOLLOWUP TIME
  dplyr::mutate( 
  Readmission_time= dplyr::case_when(
        TD_status==1 & Readmission_status==1~ TD_time+Readmission_time,
        DWCA_status==1 & Readmission_status==1~ DWCA_time+Readmission_time,
        TD_status==0 & DWCA_status==0 & Readmission_status==1~ fech_ing_next_treat_1-fech_ing_num_1,
        Readmission_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
        TD2_time= dplyr::case_when(TD2_status==1~ TD2_time+Readmission_time,T~TD2_time), 
        DWCA2_time= dplyr::case_when(DWCA2_status==1~ DWCA2_time+Readmission_time,T~DWCA2_time)) %>% 
  dplyr::mutate( 
  Readmission2_time= dplyr::case_when(
        TD2_status==1 & Readmission2_status==1~ TD2_time+Readmission2_time,
        DWCA2_status==1 & Readmission2_status==1~ DWCA2_time+Readmission2_time,
        TD2_status==0 & DWCA2_status==0 & Readmission2_status==1~ fech_ing_next_treat_2-fech_ing_num_1,
        Readmission2_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
  dplyr::mutate( 
        TD3_time= dplyr::case_when(TD3_status==1~ TD3_time+Readmission2_time,T~TD3_time), 
        DWCA3_time= dplyr::case_when(DWCA3_status==1~ DWCA3_time+Readmission2_time,T~DWCA3_time)) %>% 
  dplyr::mutate( 
  Readmission3_time= dplyr::case_when(
        TD3_status==1 & Readmission3_status==1~ TD3_time+Readmission3_time,
        DWCA3_status==1 & Readmission3_status==1~ DWCA3_time+Readmission3_time,
        TD3_status==0 & DWCA3_status==0 & Readmission3_status==1~ fech_ing_next_treat_3-fech_ing_num_1,
        Readmission3_status==0~as.numeric(as.Date("2019-11-13"))-fech_ing_num_1)) %>% 
    
    ## THE USERS THAT  DID NOT REGISTERED AN EVENT WILL COME UP TO THE FINAL TIME OF THE FOLLOW UP
     # dplyr::select(
     # id, group_match,TD_status, DWCA_status, Readmission_status, TD2_status, DWCA2_status, Readmission2_status, TD3_status, DWCA3_status, Readmission3_status,
     # TD_time, DWCA_time, Readmission_time, TD2_time, DWCA2_time, Readmission2_time, TD3_time, DWCA3_time, Readmission3_time,
     # tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3,min_achievement_1,min_achievement_2,min_achievement_3) %>% 
  #tipo_de_plan_res_baseline♦
  as.data.frame() 

#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:#:#:#:##:#:
tail(d_match_surv_msprep) %>% 
      knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption="Table 8. Data in Wide, Ten-states",
               align= c("c",rep('c', 5)))%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 13)%>% 
  kableExtra::add_footnote("Note= Proportions from the initial state") %>% 
  kableExtra::scroll_box(width = "100%", height = "350px")
Table 8. Data in Wide, Ten-states
id group_match duplicates_filtered fech_ing_num_1 fech_ing_num_2 fech_ing_num_3 fech_egres_num_1 fech_egres_num_2 fech_egres_num_3 dias_treat_imp_sin_na_1 dias_treat_imp_sin_na_2 dias_treat_imp_sin_na_3 fech_ing_next_treat_1 fech_ing_next_treat_2 fech_ing_next_treat_3 tipo_de_plan_res_1 tipo_de_plan_res_2 tipo_de_plan_res_3 motivodeegreso_mod_imp_1 motivodeegreso_mod_imp_2 motivodeegreso_mod_imp_3 min_achievement_1 min_achievement_2 min_achievement_3 TD_status cambio_fecha_ing_1 DWCA_status Readmission_status cambio_fecha_ing_readm_1 Readmission_time TD_time DWCA_time TD2_status cambio_fecha_ing_2 DWCA2_status Readmission2_status cambio_fecha_ing_readm_2 Readmission2_time TD2_time DWCA2_time TD3_status cambio_fecha_ing_3 DWCA3_status Readmission3_status cambio_fecha_ing_readm_3 Readmission3_time TD3_time DWCA3_time
22911 22,911 11,451 1 16,036 16,065 29 1 Early Drop-out Minimum achievement 0 0 1 0 0 2,177 2,177 29 0 0 0 0 0 2,177 2,177 2,177 0 0 0 0 0 2,177 2,177 2,177
22912 22,912 11,452 1 15,426 15,660 234 1 Therapeutic discharge High/Medium achievement 1 0 0 0 0 2,787 234 2,787 0 0 0 0 0 2,787 2,787 2,787 0 0 0 0 0 2,787 2,787 2,787
22913 22,913 11,454 1 16,315 16,786 471 1 Therapeutic discharge High/Medium achievement 1 0 0 0 0 1,898 471 1,898 0 0 0 0 0 1,898 1,898 1,898 0 0 0 0 0 1,898 1,898 1,898
22914 22,914 11,456 1 16,224 16,513 289 1 Therapeutic discharge High/Medium achievement 1 0 0 0 0 1,989 289 1,989 0 0 0 0 0 1,989 1,989 1,989 0 0 0 0 0 1,989 1,989 1,989
22915 22,915 11,457 1 17,688 17,744 56 1 Early Drop-out Minimum achievement 0 0 1 0 0 525 525 56 0 0 0 0 0 525 525 525 0 0 0 0 0 525 525 525
22916 22,916 11,458 1 15,796 15,856 60 1 Early Drop-out High/Medium achievement 0 0 1 0 0 2,417 2,417 60 0 0 0 0 0 2,417 2,417 2,417 0 0 0 0 0 2,417 2,417 2,417
a Note= Proportions from the initial state
invisible("No se si debiera transformarlo a años. Tal vez a meses. Si lo transformo, me darán esas extrapolaciones bizarras del artículo anterior")


ms_d_match_surv <- msprep(time = c(NA, "TD_time", "DWCA_time", "Readmission_time", "TD2_time", "DWCA2_time", "Readmission2_time", "TD3_time", "DWCA3_time", "Readmission3_time"), 
                  status = c(NA, "TD_status", "DWCA_status", "Readmission_status", "TD2_status", "DWCA2_status", "Readmission2_status", "TD3_status", "DWCA3_status", "Readmission3_status"), 
                                            data = d_match_surv_msprep,
                                            id = "id",
                                            trans = trans_matrix,
                                            keep =  c("tipo_de_plan_res_1","tipo_de_plan_res_2", "tipo_de_plan_res_3","min_achievement_1","min_achievement_2","min_achievement_3"))

#From starting state 1, subject 66 74 19717 has smallest transition time with status=0
#Everyne has an infinite number in the transition. A good exmple is the user 19717. Only experienced a therapeutic discharge, but in the time from readmission it starts on 910 but ends in INf
#Starting from state 1, simultaneous transitions possible for subjects 36666 36586 56465 136847 37595 60609 51706 76376 117544 140210 at times 126 472 32 36 1 203 45 14 5 71; smallest receiving state chosen
invisible(c("This problem responds to differences between treatments 0. Should be resolved in the initial stages"))
if(no_mostrar==1){
  d_match_surv_msprep %>% 
    dplyr::filter(id %in% unlist(
       ms_d_match_surv%>% 
        dplyr::filter(Tstop<=Tstart) %>% 
        dplyr::select(id,from,to,trans,Tstart,Tstop,time,status) %>% 
        distinct(id))) %>%
    #dplyr::mutate(diff_bet_treat=fech_ing_next_treat-fech_egres_num)%>% 
    View()
}

if(no_mostrar==1){
d_match_surv %>% 
    dplyr::rename("id"="row") %>% 
    dplyr::filter(id %in% unlist(
        ms2_CONS_C1_SEP_2020_women_imputed %>% 
            dplyr::filter(Tstop<=Tstart) %>% 
            dplyr::select(id,from,to,trans,Tstart,Tstop,time,status) %>% 
            distinct(id))) %>%
    dplyr::select(id, motivodeegreso_mod_imp, contains("fech"))
}
path<-rstudioapi::getSourceEditorContext()$path
if (grepl("CISS Fondecyt",path)==T){
    dta_path<-paste0("C:/Users/CISS Fondecyt/OneDrive/Escritorio/SUD_CL/")
  } else if (grepl("andre",path)==T){
    dta_path<-paste0('C:/Users/andre/Desktop/SUD_CL/')
  } else if (grepl("E:",path)==T){
    dta_path<-paste0("E:/Mi unidad/Alvacast/SISTRAT 2019 (github)/")
  } else {
    dta_path<-paste0("G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/")
  }

rio::export(
d_match_surv_msprep %>% 
      dplyr::select(
      id, group_match,TD_status, DWCA_status, Readmission_status, TD2_status, DWCA2_status, Readmission2_status, TD3_status, DWCA3_status, Readmission3_status,
      TD_time, DWCA_time, Readmission_time, TD2_time, DWCA2_time, Readmission2_time, TD3_time, DWCA3_time, Readmission3_time,
      tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3,min_achievement_1,min_achievement_2,min_achievement_3), 
  #dplyr::rename("id"="row", "ther_disch_time"="date_ther_disch","ther_disch_status"="ther_disch",
  #             "readmission_time"="date_post_treat","readmission_status"="readmission"),
paste0(dta_path,"ten_st_msprep.dta"))

rio::export(
d_match_surv_msprep %>% 
  rename_with(~ c("group.match","TD.status", "DWCA.status", "Readmission.status", "TD2.status", "DWCA2.status", "Readmission2.status", "TD3.status", "DWCA3.status", "Readmission3.status",
      "TD.time", "DWCA.time", "Readmission.time", "TD2.time", "DWCA2.time", "Readmission2.time", "TD3.time", "DWCA3.time", "Readmission3.time"), c("group_match","TD_status", "DWCA_status", "Readmission_status", "TD2_status", "DWCA2_status", "Readmission2_status", "TD3_status", "DWCA3_status", "Readmission3_status",
      "TD_time", "DWCA_time", "Readmission_time", "TD2_time", "DWCA2_time", "Readmission2_time", "TD3_time", "DWCA3_time", "Readmission3_time")) %>% 
      dplyr::select(
      id, group.match,TD.status, DWCA.status, Readmission.status, TD2.status, DWCA2.status, Readmission2.status, TD3.status, DWCA3.status, Readmission3.status,
      TD.time, DWCA.time, Readmission.time, TD2.time, DWCA2.time, Readmission2.time, TD3.time, DWCA3.time, Readmission3.time,
      tipo_de_plan_res_1,tipo_de_plan_res_2, tipo_de_plan_res_3,min_achievement_1,min_achievement_2,min_achievement_3), 
  #dplyr::rename("id"="row", "ther_disch_time"="date_ther_disch","ther_disch_status"="ther_disch",
  #             "readmission_time"="date_post_treat","readmission_status"="readmission"),
paste0(dta_path,"ten_st_msprep.csv"))


data.frame(events(ms_d_match_surv)$Frequencies) %>% 
    dplyr::filter(to!="total entering") %>% 
    left_join(data.frame(events(ms_d_match_surv)$Proportions), by=c("from", "to")) %>% 
    dplyr::rename("Frequencies"="Freq.x", "Proportions"="Freq.y") %>% 
    dplyr::arrange(from, to) %>% 
    dplyr::mutate(diff=ifelse(as.character(from)!=as.character(to),0,1)) %>% 
    dplyr::filter(diff==0) %>%
    dplyr::select(-diff) %>% 
    dplyr::mutate(comb=paste0(from,"_",to)) %>% 
    dplyr::filter(comb %in% c("Admission_TD","Admission_DWCA", "Admission_Readmission", "TD_Readmission", "DWCA_Readmission", "Readmission_TD2", "Readmission_DWCA2", "Readmission_Readmission2","TD2_Readmission2", "DWCA2_Readmission2", "Readmission2_TD3","Readmission2_DWCA3","Readmission2_Readmission3","TD3_Readmission3", "DWCA3_Readmission3")) %>% 
    dplyr::select(-comb) %>% 
    dplyr::mutate(Proportions=scales::percent(Proportions)) %>% 
    knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption="Table 9. Empirical State Transition Matrix, Ten States Model",
               align= c("c",rep('c', 5)))%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>% 
  kableExtra::add_footnote("Note. No event describes cases that remained in the state. Percentage depicts the proportion of the state of origin.") %>% 
  kableExtra::scroll_box(width = "100%", height = "350px")
Table 9. Empirical State Transition Matrix, Ten States Model
from to Frequencies Proportions
Admission TD 5,542 24.18%
Admission DWCA 13,229 57.73%
Admission Readmission 804 3.51%
TD Readmission 1,538 27.75%
DWCA Readmission 4,056 30.66%
Readmission TD2 1,418 22.16%
Readmission DWCA2 3,669 57.35%
Readmission Readmission2 290 4.53%
TD2 Readmission2 429 30.25%
DWCA2 Readmission2 1,292 35.21%
Readmission2 TD3 395 19.64%
Readmission2 DWCA3 1,128 56.09%
Readmission2 Readmission3 109 5.42%
TD3 Readmission3 126 31.90%
DWCA3 Readmission3 412 36.52%
a Note. No event describes cases that remained in the state. Percentage depicts the proportion of the state of origin.


Consideration of the Appropriateness of the proportional hazards assumption

Continuous variables need to be categorized into groups. The plot described is also known as the log(−log(survival)) plot, as the cumulative hazard is equal to the negative logarithm of the survival proportion. This approach requires a subjective assessment (Bradburn, Clark, Love, et al., 2003).

#Bradburn, M., Clark, T., Love, S. et al. Survival Analysis Part III: Multivariate data analysis – choosing a model and assessing its adequacy and fit. Br J Cancer 89, 605–611 (2003). https://doi.org/10.1038/sj.bjc.6601120

plots<- data.frame(title=rep(
  c("Admission to TD","Admission to DWCA", "Admission to Readmission", "TD to Readmission", "DWCA to Readmission", "Readmission to TD2", "Readmission to DWCA2", "Readmission to Readmission2","TD2 to Readmission2", "DWCA2 to Readmission2", "Readmission2 to TD3","Readmission2 to DWCA3","Readmission2 to Readmission3","TD3 to Readmission3", "DWCA3 to Readmission3"),1),trans=rep(1:max(trans_matrix,na.rm=T),1))

## SIN COVARIABLES
layout(matrix(1:15, nc = 3, byrow = F))
for(i in c(1:max(trans_matrix,na.rm=T))){
plot(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$time), 
     log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$surv)), type="l",
     xlab="log(Days)", ylab="", xaxs="i",yaxs="i",
     las=1,cex.lab=.5, cex.axis=.5)
lines(log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$time), 
      log(-log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$surv)), lty=2)
legend(0,-4, c("OUT", "RES"), bty="n", lty=c(2,1), cex=.5)
title(main=paste0(plots[i,"title"]), cex.main=.8)
}
Figure 18a. LOG CUMULATIVE HAZARD VS LOG TIME PLOT (w/o covars)

Figure 18a. LOG CUMULATIVE HAZARD VS LOG TIME PLOT (w/o covars)

layout(matrix(1:15, nc = 3, byrow = F))

for(i in c(1:max(trans_matrix,na.rm=T))){
plot(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$time, 
     -log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==0))$surv), type="l",
     xlab="Days", ylab="", xaxs="i",yaxs="i", 
     las=1,cex.lab=.5, cex.axis=.5, col=1)
lines(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$time, 
      -log(survfit(Surv(time,status)~1, data=subset(ms_d_match_surv, trans==plots[i,"trans"] & tipo_de_plan_res_1==1))$surv), lty=2)
legend(6,.1, c("OUT", "RES"), bty="n", lty=c(2,1), cex=.5)
title(main=paste0(plots[i,"title"]), cex.main=.8)
}
Figure 18b. CUMULATIVE HAZARD PLOT: -LOG(KM SURVIVAL) (w/o covars)

Figure 18b. CUMULATIVE HAZARD PLOT: -LOG(KM SURVIVAL) (w/o covars)

As seen in both Figures above, the cumulative hazards does not follow a proportional trend in the transitions from Admission to DWCA, Admission to TD, TD to Readmission, DWCA to Readmission, and Readmission to Second Readmission.


Decision whether to use Markov or Semi-Markov


#state arrival extended (semi-)Markov to mean that the i → j transition hazard depends on thetime of arrival at state i. 

#Build a Cox proportional hazard model including treatment and time in previous state as covariates

tab_cox_markov<- data.frame()
for (i in c(4:max(trans_matrix,na.rm=T))){
coxph(Surv(Tstart,Tstop,status)~factor(tipo_de_plan_res_1)+Tstart,
                  data=subset(ms_d_match_surv, trans==i),method = "breslow") %>% 
    assign(paste0("CoxMarkov",i),.,envir=.GlobalEnv)
  round(exp(coef(get(paste0("CoxMarkov",i)))),2)%>% assign(paste0("HR",i),.,envir=.GlobalEnv)
  round(exp(confint(get(paste0("CoxMarkov",i)))),2)%>% assign(paste0("CI",i),.,envir=.GlobalEnv)
  round(coef(summary(get(paste0("CoxMarkov",i))))[,5],4)%>% assign(paste0("P",i),.,envir=.GlobalEnv)
  data.frame(get(paste0("CI",i))) %>% dplyr::rename("Lower 95% CI"="X2.5..","Upper 95% CI"="X97.5..")%>% assign(paste0("CI",i),.,envir=.GlobalEnv)
  tab_cox_markov_add<- cbind.data.frame(plots[i, "title"],get(paste0("HR",i)),get(paste0("CI",i)),get(paste0("P",i)))
  tab_cox_markov<-rbind.data.frame(tab_cox_markov,tab_cox_markov_add)
}

tab_cox_markov %>% 
  data.table(keep.rownames=T) %>% 
  dplyr::rename("Terms"="rn", "Transition"="plots[i, \"title\"]",
                "HR"="get(paste0(\"HR\", i))","P"="get(paste0(\"P\", i))") %>% 
  dplyr::mutate(Terms=dplyr::case_when(grepl("tipo_de_", Terms)~"Type of Plan (Residential)",
                                    grepl("Tstart",Terms)~"Time in previous state(in days)")) %>% 
  dplyr::mutate(P=ifelse(P<.001,"<.001",sprintf("%1.3f",P))) %>% 
  dplyr::rename("Sig."="P") %>% 
  dplyr::mutate(`95% CIs`=paste0(sprintf("%2.2f",`Lower 95% CI`),", ",sprintf("%2.2f",`Upper 95% CI`))) %>% 
  dplyr::select(-`Lower 95% CI`,-`Upper 95% CI`) %>% 
  dplyr::select(Transition, Terms, HR, `95% CIs`, Sig.) %>% 
      knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption="Table 10. PH Model incluiding time in previous state & Type of Program as a covariate",
               align= c("c",rep('c', 5)))%>%
  #kableExtra::pack_rows("Three-states", 1, 2) %>% 
  #kableExtra::pack_rows("Four-states", 3, 4) %>% 
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11)%>% 
  kableExtra::scroll_box(width = "100%", height = "350px")
Table 10. PH Model incluiding time in previous state & Type of Program as a covariate
Transition Terms HR 95% CIs Sig.
TD to Readmission Type of Plan (Residential) 1.75 1.56, 1.96 <.001
TD to Readmission Time in previous state(in days) 1.00 1.00, 1.00 <.001
DWCA to Readmission Type of Plan (Residential) 1.58 1.49, 1.69 <.001
DWCA to Readmission Time in previous state(in days) 1.00 1.00, 1.00 <.001
Readmission to TD2 Type of Plan (Residential) 1.18 1.06, 1.31 0.003
Readmission to TD2 Time in previous state(in days) 1.00 1.00, 1.00 <.001
Readmission to DWCA2 Type of Plan (Residential) 0.96 0.90, 1.03 0.264
Readmission to DWCA2 Time in previous state(in days) 1.00 1.00, 1.00 <.001
Readmission to Readmission2 Type of Plan (Residential) 0.86 0.68, 1.09 0.207
Readmission to Readmission2 Time in previous state(in days) 1.00 1.00, 1.00 <.001
TD2 to Readmission2 Type of Plan (Residential) 1.05 0.86, 1.28 0.623
TD2 to Readmission2 Time in previous state(in days) 1.00 1.00, 1.00 <.001
DWCA2 to Readmission2 Type of Plan (Residential) 1.04 0.93, 1.16 0.476
DWCA2 to Readmission2 Time in previous state(in days) 1.00 1.00, 1.00 <.001
Readmission2 to TD3 Type of Plan (Residential) 1.19 0.96, 1.46 0.105
Readmission2 to TD3 Time in previous state(in days) 1.00 1.00, 1.00 <.001
Readmission2 to DWCA3 Type of Plan (Residential) 0.99 0.88, 1.12 0.884
Readmission2 to DWCA3 Time in previous state(in days) 1.00 1.00, 1.00 <.001
Readmission2 to Readmission3 Type of Plan (Residential) 1.07 0.72, 1.57 0.748
Readmission2 to Readmission3 Time in previous state(in days) 1.00 1.00, 1.00 <.001
TD3 to Readmission3 Type of Plan (Residential) 0.87 0.60, 1.25 0.452
TD3 to Readmission3 Time in previous state(in days) 1.00 1.00, 1.00 <.001
DWCA3 to Readmission3 Type of Plan (Residential) 1.00 0.82, 1.22 0.970
DWCA3 to Readmission3 Time in previous state(in days) 1.00 1.00, 1.00 <.001
#a variable appears on both the left and right sides of the formula
#this warning should be normal, since we are dealing with time to arrival at a determined state.
columna_dummy <- function(df, columna) {
  df %>% 
  mutate_at(columna, ~paste(columna, eval(as.symbol(columna)), sep = "_")) %>% 
    mutate(valor = 1) %>% 
    spread(key = columna, value = valor, fill = 0)
}

catVars<-c("min_achievement_1","min_achievement_2",   "min_achievement_3")


ms_d_match_surv_mod<-ms_d_match_surv
for (i in c(1:length(catVars))){#catVars[-10] excluding treatment indicator
  cat<-as.character(catVars[i])#catVars[-10] excluding treatment indicator
  ms_d_match_surv_mod<-columna_dummy(ms_d_match_surv_mod,cat)
}

colnames(ms_CONS_C1_SEP_2020_women_imputed_mod)[8:length(ms_CONS_C1_SEP_2020_women_imputed_mod)]<-
  colnames(janitor::clean_names(ms_CONS_C1_SEP_2020_women_imputed_mod))[8:length(ms_CONS_C1_SEP_2020_women_imputed_mod)]

attr(ms_CONS_C1_SEP_2020_women_imputed_mod,"trans")<-mat_3_states

#data: dataset in etm format: "entry", "exit", "from", "to", "id". Should also contain the relevant covariates: no factors allowed
#Multi-state data in msdata format. Should also contain (dummy codings of) the relevant covariates; no factors allowed  
#  ms_CONS_C1_SEP_2020_women_imputed$id<-ms_CONS_C1_SEP_2020_women_imputed$row

ms_CONS_C1_SEP_2020_women_imputed_mod$id<-ms_CONS_C1_SEP_2020_women_imputed_mod$row
ms_CONS_C1_SEP_2020_women_imputed_mod$row<-NULL
formula_char<-
"edad_al_ing_grupos_18_29+ edad_al_ing_grupos_30_39+ 
edad_al_ing_grupos_40_49+ edad_al_ing_grupos_50+ escolaridad_rec_1_more_than_high_school+
escolaridad_rec_2_completed_high_school_or_less+ escolaridad_rec_3_completed_primary_school_or_less+
sus_principal_mod_alcohol+ sus_principal_mod_cocaine_hydrochloride+ sus_principal_mod_cocaine_paste+
sus_principal_mod_marijuana+ sus_principal_mod_other+ freq_cons_sus_prin_1_day_a_week_or_more+
freq_cons_sus_prin_2_to_3_days_a_week+ freq_cons_sus_prin_4_to_6_days_a_week+
freq_cons_sus_prin_daily+ freq_cons_sus_prin_less_than_1_day_a_week+
compromiso_biopsicosocial_1_mild+ compromiso_biopsicosocial_2_moderate+
compromiso_biopsicosocial_3_severe+ tenencia_de_la_vivienda_mod_illegal_settlement+
tenencia_de_la_vivienda_mod_others+ tenencia_de_la_vivienda_mod_owner_transferred_dwellings_pays_dividends+
tenencia_de_la_vivienda_mod_renting+ tenencia_de_la_vivienda_mod_stays_temporarily_with_a_relative+
num_otras_sus_mod_more_than_one_additional_substance+ num_otras_sus_mod_no_additional_substance+
num_otras_sus_mod_one_additional_substance+ numero_de_hijos_mod_rec_no+
numero_de_hijos_mod_rec_yes+ tipo_de_programa_2_1+
tipo_de_programa_2_0+ tipo_de_plan_res_outpatient+
tipo_de_plan_res_residential
"
MT <- MarkovTest(ms_d_match_surv, 
                 formula= "tipo_de_plan_res_1",
                 transition = 4,
                 grid = 1,#seq(0, 11, by = 1/12), 
                 B = 25)
#Tried with transition 2 and 3
#Error in rep(mmm, length.out = l1) : 
#  attempt to replicate an object of type 'symbol'
#Además: There were 50 or more warnings (use warnings() to see the first 50)

data<-ms_d_match_surv #no puede ir arrival
id<-"id"
transition<-3
grid<-90 #3 months
grid<-1096 #3 years
dist<-"poisson"
trans=ifelse(is.null(attr(data, "trans")),get("mat_3_states"),attr(data, "trans"))
fn = list(function(x) mean(abs(x), na.rm = TRUE))
fn2 = list(function(x) mean(x, na.rm = TRUE))
formula<-formula_char
B=25

MarkovTest <- function(data, id, formula = NULL, transition, grid,
                       trans=NULL,
                       B = 1000,
                       fn = list(function(x) mean(abs(x), na.rm = TRUE)),
                       fn2 = list(function(x) mean(x, na.rm = TRUE)),
                       min_time = 0,
                       other_weights = NULL,
                       dist = c("poisson", "normal")) {
  
  dist <- match.arg(dist)
  if (missing(id)) id <- "id"
  # Remove "empty" lines in the data
  wh <- which(data$Tstop <= data$Tstart)
  if (length(wh)>0)
  {
    warning(length(wh), " lines with Tstart <= Tstop, have been removed before applying tests!")
    data <- data[-wh, ]
  }

  # Convert data to etm data
  # Make sure to retain all covariates (possibly way to many) in msdata (needed in formula perhaps)
  mtch <- match(c("id", "from", "to", "trans", "Tstart", "Tstop", "status"), names(data)) 
  covcols <- 1:ncol(data)
  covcols <- covcols[!covcols %in% mtch]
  ncovs <- length(covcols)
  
  trans <- get("mat_3_states")
  etmdata <- msdata2etm(data, id)
  if (ncovs > 0) etmdata <- msdata2etm(data, id, names(data)[covcols])
  trans2 <- to.trans2(trans)
  tfrom <- trans2$from[trans2$transno == transition]
  tto <- trans2$to[trans2$transno == transition]
  
  # Determine qualifying set
  qualset <- c(tfrom, which(trans2Q(trans)[, tfrom] > 0))
  qualset <- sort(unique(qualset))  # for circular models, tfrom is included twice

  # Functions
  if (!is.list(fn)) 
    fn <- list(fn)  # coerce to be list if a single function is provided
  if (is.list(fn) & is.function(fn[[1]])) {
    # coerce to be a list of lists, by repeating the same list each time
    tempfn <- list()
    for (i in 1:length(qualset)) tempfn[[i]] <- fn
    fn <- tempfn
  }
  if (!is.list(fn2)) 
    fn2 <- list(fn2)  # coerce to be list if a single function is provided
  
  # Establish the relevant patients who ever enter tfrom
  relpat <- sort(unique(etmdata$id[etmdata$from == tfrom]))
  
  rdata <- etmdata[etmdata$from == tfrom, ]  # only need time periods in the relevant state...
  rdata$status <- 1 * (rdata$to == tto)
  if (!is.null(formula)) {
    form <- as.formula(paste("Surv(entry, exit, status) ~ ", formula, 
                             sep = ""))
    progfit <- coxph(form, data = rdata)
    if (length(progfit$coefficients) > 0) {
      #Sacado por andrés
      Zmat <- as.matrix(rdata[, match(names(progfit$coefficients), 
                                      names(rdata))])
      #Zmat <- as.matrix(rdata[, 7:44])
      Ncov <- dim(Zmat)[2]
    } else {
      Ncov <- 0
    }
    if (!is.null(progfit$offset)) {
      offset <- progfit$offset
    } else {
      offset <- rep(0, dim(rdata)[1])
    }
  } else {
    Ncov <- 0
    offset <- rep(0, dim(rdata)[1])
    progfit <- NULL
  }
  
  # Minimal data, change names
  progdat <- rdata[, match(c("id", "entry", "exit", "status"), names(rdata))]
  names(progdat) <- c("id", "T0", "T1", "D")
  
  nobs_grid <- sapply(grid, function(x) sum(progdat$D[progdat$T1 > x]))
  
  # Have the extra dimension of indexes
  index_gM <- array(0, c(length(relpat), length(grid), length(qualset)))
  for (indx in 1:length(qualset)) {
    qualstate <- qualset[indx]
    index_g <- sapply(grid, function(y) sapply(relpat, function(x)
      which(etmdata$entry < y & etmdata$exit >= y & etmdata$id == x)))
    index_g <- array(1 * (etmdata$from[sapply(index_g, function(y)
      ifelse(length(y) > 0, y, 
             dim(etmdata)[1] + 1))] == qualstate), c(length(relpat), length(grid)))
    index_g[is.na(index_g)] <- 0
    index_gM[, , indx] <- index_g
  }

  # Need a separate Z3mat for each group as well...
  Z3mat <- index_gM[match(progdat$id, relpat), , , drop = FALSE]
  
  N1 <- dim(progdat)[1]
  
  if (Ncov > 0) {
    LP <- c(Zmat %*% progfit$coefficients) + offset
  } else {
    LP <- rep(0, N1) + offset
  }
  S0 <- sapply(1:N1, function(x) sum(exp(LP) * (progdat$T0 < progdat$T1[x] & 
                                                  progdat$T1 >= progdat$T1[x])))
  
  incr <- progdat$D / S0
  cumhaz <- approxfun(c(0, sort(unique(progdat$T1)), Inf),
                      c(0, cumsum(tapply(incr, progdat$T1, sum)), sum(incr)),
                      method = "constant")
  resid_mat <- sapply(grid, function(x) progdat$D * (progdat$T1 > x) - exp(LP) *
                        (cumhaz(pmax(x, progdat$T1)) - cumhaz(pmax(x, progdat$T0))))
  
  # Have a separate trace for each qualifying state...
  obs_trace <- array(0, c(length(grid), length(qualset)))
  for (indx in 1:(length(qualset))) {
    obs_trace[, indx] <- sapply(1:length(grid), function(k)
      sum(resid_mat[, k] * Z3mat[, k, indx] * (progdat$T1 > grid[k])))
  }
  
  nqstate <- length(qualset)
  if (Ncov > 0) 
    Ifish <- progfit$var
  
  N1 <- dim(progdat)[1]
  if (Ncov > 0) 
    Zbar0 <- array(0, c(N1, Ncov))
  
  Zbar <- array(0, c(N1, length(grid), nqstate))
  for (i in 1:N1) {
    x <- i
    if (Ncov > 0) {
      for (j in 1:Ncov) {
        Zbar0[i, j] <- sum(Zmat[, j] * exp(LP) *
                             (progdat$T0 < progdat$T1[x] & progdat$T1 >= progdat$T1[x])) /
          sum(exp(LP) * (progdat$T0 < progdat$T1[x] & progdat$T1 >= progdat$T1[x]))
      }
    }
    for (j in 1:length(grid)) {
      for (k in 1:nqstate)
        Zbar[i, j, k] <- sum(Z3mat[, j, k] * exp(LP) * 
                               (progdat$T0 < progdat$T1[x] & progdat$T1 >= progdat$T1[x])) /
          sum(exp(LP) * (progdat$T0 < progdat$T1[x] & progdat$T1 >= progdat$T1[x]))
    }
  }
  
  NAe <- incr
  
  if (Ncov > 0) {
    Hmat <- array(0, c(length(grid), Ncov, nqstate))
    for (j in 1:Ncov) {
      # for (k in 1:nqstate) Hmat[,j,k] <- sapply(1:length(grid),function(y)
      # sum(sapply(1:N1,function(x) sum(exp(LP) *Zmat[,j]* (Z3mat[x,y,k] -
      # Zbar[x,y,k]) * NAe[x] * (progdat$T0[x] > grid[y] & progdat$T1[x] <=
      # progdat$T1)))))
      for (k in 1:nqstate) Hmat[, j, k] <- sapply(1:length(grid), function(y) 
        sum(sapply(1:N1, function(x)
          sum(exp(LP[x]) * ((Zmat[x, j] - Zbar0[, j]) *
                              (Z3mat[x, y, k] - Zbar[, y, k])) * NAe *
                (progdat$T1[x] > grid[y]) * (progdat$T1 > progdat$T0[x] & progdat$T1 <= progdat$T1[x])))))
    }
  }
  
  if (Ncov > 0) {
    multiplier <- array(0, dim(Hmat))
    for (k in 1:nqstate) multiplier[, , k] <- Hmat[, , k] %*% Ifish
    est_cov <- array(0, c(length(grid), nqstate, nqstate))
    for (indx1 in 1:nqstate) {
      for (indx2 in (indx1):nqstate) {
        est_var <- sapply(1:length(grid), function(k)
          sum(sapply(1:N1, function(v) 
            sum(((Z3mat[v, k, indx1] - Zbar[, k, indx1]) * 
                   (progdat$T1 > grid[k]) - c(multiplier[k, , indx1, drop = FALSE] %*%
                                                t(Zmat[v, ] - Zbar0))) *
                  ((Z3mat[v, k, indx2] - Zbar[, k, indx1]) * (progdat$T1 > grid[k]) - 
                     c(multiplier[k, , indx2, drop = FALSE] %*% t(Zmat[v, ] - Zbar0))) *
                  exp(LP[v]) * (progdat$T0[v] < progdat$T1 & progdat$T1[v] >= progdat$T1) * NAe))))
        est_cov[, indx1, indx2] <- est_cov[, indx2, indx1] <- est_var
      }
    }
    
  } else {
    est_cov <- array(0, c(length(grid), nqstate, nqstate))
    for (indx1 in 1:nqstate) {
      for (indx2 in (indx1):nqstate) {
        est_var <- sapply(1:length(grid), function(k)
          sum(sapply(1:N1, function(v)
            sum((Z3mat[v, k, indx1] - Zbar[, k, indx1]) * (Z3mat[v, k, indx2] - Zbar[, k, indx2]) *
                  exp(LP[v]) * (progdat$T1 > grid[k] & progdat$T0[v] < progdat$T1 & progdat$T1[v] >= progdat$T1) * NAe))))
        est_cov[, indx1, indx2] <- est_cov[, indx2, indx1] <- est_var
      }
    }
  }
  
  # First obtain the individually normalized traces...
  est_var <- obs_norm_trace <- array(0, c(length(grid), nqstate))
  for (k in 1:nqstate) {
    est_var[, k] <- est_cov[cbind(1:length(grid), k, k)]
    # This should be the same as before...
    obs_norm_trace[, k] <- obs_trace[, k] / sqrt(est_var[, k] + 1 * (est_var[, k] == 0))
  }
  # Find singular matrices
  obs_chisq_trace <- rep(0, length(grid))
  for (k in 1:length(grid)) {
    sol <- tryCatch(solve(est_cov[k, -1, -1]), error = function(e)
      return(diag(0, nqstate - 1)))
    obs_chisq_trace[k] <- (obs_trace[k, -1]) %*% sol %*%
      (obs_trace[k, -1]) # do something about singular matrices...
  }
  
  ############## 
  
  n_wb_trace <- wb_trace0 <- wb_trace <- array(0, c(B, length(grid), nqstate))
  nch_wb_trace <- array(0, c(B, length(grid)))
  for (wb in 1:B) {
    if (dist == "poisson") {
      G <- rpois(dim(progdat)[1], 1) - 1
    } else if (dist == "normal") {
      G <- rnorm(dim(progdat)[1], 0, 1)
    } else stop("argument dist should be poisson or normal")
    trace0 <- array(0, c(length(grid), nqstate))
    for (k in 1:nqstate) {
      trace0[, k] <- apply(sapply(1:length(grid), function(x)
        progdat$D * (Z3mat[, x, k] - Zbar[, x, k]) * (progdat$T1 > grid[x]) * G), 2, sum)
      if (Ncov > 0) {
        Imul <- sapply(1:Ncov, function(x)
          sum(progdat$D * (Zmat[, x] - Zbar0[, x]) * G))
        trace1 <- (Hmat[, , k] %*% Ifish %*% Imul)[, 1]
      } else {
        trace1 <- 0
      }
      wb_trace[wb, , k] <- trace0[, k] - trace1
      n_wb_trace[wb, , k] <- wb_trace[wb, , k]/sqrt(est_var[, k] + 
                                                      1 * (est_var[, k] == 0))
      for (w in 1:length(grid)) {
        sol <- tryCatch(solve(est_cov[w, -1, -1]), error = function(e)
          return(diag(0, nqstate - 1)))
        nch_wb_trace[wb, w] <- (wb_trace[wb, w, -1]) %*% sol %*% 
          (wb_trace[wb, w, -1]) # do something about singular matrices...
      }
    }
  }
  
  # Need to have one of these per nqstate
  NS <- length(fn[[1]])
  
  orig_stat <- array(sapply(1:nqstate, function(y)
    sapply(fn[[y]], function(g) g(obs_norm_trace[, y]))), c(NS, nqstate))
  orig_ch_stat <- sapply(fn2, function(g) g(obs_chisq_trace))
  
  p_stat_wb <- array(0, c(NS, nqstate))
  wb_stat <- array(0, c(B, NS, nqstate))
  for (k in 1:nqstate) {
    wb_stat[, , k] <- array(t(apply(n_wb_trace[, , k, drop = FALSE], 
                                    1, function(x)
                                      sapply(fn[[k]], function(g) g(x)))), c(B, NS))
    p_stat_wb[, k] <- sapply(1:NS, function(x) mean(wb_stat[, x, k] > orig_stat[x, k]))
  }
  est_quant <- array(0, c(2, length(grid), nqstate))
  for (k in 1:nqstate)
    est_quant[, , k] <- apply(n_wb_trace[, , k, drop = FALSE], 2,
                              quantile, c(0.025, 0.975), na.rm = TRUE)
  NS2 <- length(fn2)
  p_ch_stat_wb <- rep(0, NS2)
  wb_ch_stat <- array(t(apply(nch_wb_trace, 1, function(x)
    sapply(fn2, function(g) g(x)))), c(B, NS2))
  p_ch_stat_wb <- sapply(1:NS2, function(x) mean(wb_ch_stat[, x] > orig_ch_stat[x]))
  # Is a question whether should use Nsub as number of subjects or number
  # of spells within the state
  
  MTres <- list(orig_stat = orig_stat, orig_ch_stat = orig_ch_stat, p_stat_wb = p_stat_wb, 
                p_ch_stat_wb = p_ch_stat_wb, b_stat_wb = wb_stat, zbar = obs_norm_trace, 
                nobs_grid = nobs_grid, Nsub = length(relpat), est_quant = est_quant, 
                obs_chisq_trace = obs_chisq_trace, nch_wb_trace = nch_wb_trace, 
                n_wb_trace = n_wb_trace, est_cov = est_cov, transition = transition,
                from = tfrom, to = tto, B = B, dist = dist,
                qualset = qualset, coxfit = progfit, fn = fn, fn2 = fn2)
  
  class(MTres) <- c("MarkovTest")
  return(MTres)
}

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

#' @export
optimal_weights_multiple <- function(data, id, grid, transition, min_time = 0)
{
  # Convert data to etm data
  trans <- attr(data, "trans")
  etmdata <- msdata2etm(data, id)
  trans2 <- to.trans2(trans)
  from <- trans2$from[trans2$transno == transition]
  to <- trans2$to[trans2$transno == transition]

  numbers <- sapply(grid, function(x)
    table(factor(etmdata$from)[(etmdata$entry <= x & etmdata$exit > x)]))
  subevent <- sapply(grid, function(x)
    sum(etmdata$from == from & etmdata$to == to & etmdata$exit > x))
  tnumbers <- apply(numbers, 2, sum)
  weights <- sapply(1:dim(numbers)[1], function(x)
    subevent * numbers[x, ] * (tnumbers - numbers[x, ])/tnumbers^2)
  weights[is.nan(weights)] <- 0
  weight <- apply(weights, 1, max)
  weight * diff(c(min_time, grid))
}

#' @export
optimal_weights_matrix <- function(data, id, grid, transition, min_time = 0, 
                                   other_weights = NULL)
{
  # Convert data to etm data
  trans <- attr(data, "trans")
  etmdata <- msdata2etm(data, id)
  trans2 <- to.trans2(trans)
  from <- trans2$from[trans2$transno == transition]
  to <- trans2$to[trans2$transno == transition]

  numbers <- sapply(grid, function(x)
    table(factor(etmdata$from)[(etmdata$entry <= x & etmdata$exit > x)]))
  subevent <- sapply(grid, function(x)
    sum(etmdata$from == from & etmdata$to == to & etmdata$exit > x))
  tnumbers <- apply(numbers, 2, sum)
  weights <- sapply(1:dim(numbers)[1], function(x)
    sqrt(subevent * numbers[x, ] * (tnumbers - numbers[x, ]))/tnumbers)
  weights[is.nan(weights)] <- 0
  fn_list <- list()
  for (i in 1:dim(numbers)[1]) {
    # Take into account the distance between grids
    val <- weights[, i] * diff(c(min_time, grid))
    fn_list[[i]] <- list(fn = function(x)
      weighted.mean(abs(x), w = val, na.rm = TRUE))
    if (!is.null(other_weights)) {
      nother <- length(other_weights)
      fn_list[[i]][2:(nother + 1)] <- other_weights
    }
  }
  # Store the weights as an attribute
  attr(fn_list, "weights") <- weights
  fn_list
}

The model considered the transition from intermediate states to our absorbing state (being readmitted) is explained by the time spent in the previous health state. This covariate (time in the previous state) was shown to be statistically significant (p<.001); results indicated a longer duration spent in the first treatment is associated with increased risk of readmission. Therefore, a semi-Markov (called a Markov renewal model) or clock reset approach should be undertaken for both models. They bear the advantage that information from the process history can be included as transition-specific explanatory covariates.


#ms_d_match_surv[, c("Tstart", "Tstop", "time")] <- ms_d_match_surv[, c("Tstart", "Tstop", "time")]/30
ms_d_match_surv <- expand.covs(ms_d_match_surv, "arrival", append = TRUE, longnames =F)
## Error in eval(predvars, data, env): objeto 'arrival' no encontrado
ms_d_match_surv$arrival<-ms_d_match_surv$Tstart


Assessment of Fit of Transitions

We need to derive appropriate functional forms and define respective survival functions. One reason to favor patametric models is that they can be easier to generalize. Several candidate distributions were considered to model transitions, including Weibull (assume a monotonically increasing or decreasing hazard), Log-logistic (non-monotonic hazards), Gompertz (assume a monotonically increasing or decreasing hazard), Log-normal (non-monotonic hazards), Exponential (constant hazard), Gamma, Generalized gamma & Generalized F (more flexible).

The following plots fitted survival curves from each model (colored lines), with the Kaplan-Meier estimate, in red, obtained from an example of Jackson available here, added to the contributions of Wathers & Cutler available here.


#options(warn=-1)

n_iter<-10000

tiempo_antes_fits<-Sys.time()

#Weathers, Brandon and Cutler, Richard Dr., "Comparision of Survival Curves Between Cox Proportional 
#Hazards, Random Forests, and Conditional Inference Forests in Survival Analysis" (2017). All Graduate 
#Plan B and other Reports. 927. 
#https://digitalcommons.usu.edu/gradreports/927 

#<div style="border: 1px solid #ddd; padding: 5px; overflow-y: scroll; height:350px; overflow-x: scroll; width:100%">            
#https://devinincerti.com/2019/01/01/sim-mstate.html

n_trans <- max(trans_matrix, na.rm = TRUE)
fits_wei <- vector(mode = "list", length = n_trans)
fits_weiph <- vector(mode = "list", length = n_trans)
fits_llogis <- vector(mode = "list", length = n_trans)
fits_gomp <- vector(mode = "list", length = n_trans)
fits_logn <- vector(mode = "list", length = n_trans)
fits_exp <- vector(mode = "list", length = n_trans)
fits_gam <- vector(mode = "list", length = n_trans)
fits_ggam <- vector(mode = "list", length = n_trans)
fits_genf <- vector(mode = "list", length = n_trans)
fits_c_wei <- vector(mode = "list", length = n_trans)
fits_c_weiph <- vector(mode = "list", length = n_trans)
fits_c_llogis <- vector(mode = "list", length = n_trans)
fits_c_gomp <- vector(mode = "list", length = n_trans)
fits_c_logn <- vector(mode = "list", length = n_trans)
fits_c_exp <- vector(mode = "list", length = n_trans)
fits_c_gam <- vector(mode = "list", length = n_trans)
fits_c_ggam <- vector(mode = "list", length = n_trans)
fits_c_genf <- vector(mode = "list", length = n_trans)
km.lc<-list()
fits_cox<-list()
fits_c_cox<-list()
#"gengamma" Generalized gamma (stable parameterisation)
#"gengamma.orig"    Generalized gamma (original parameterisation)
#"genf" Generalized F (stable parameterisation)
#"genf.orig"    Generalized F (original parameterisation)
#"weibull"  Weibull
#"gamma"    Gamma
#"exp"  Exponential
#"lnorm"    Log-normal
#"gompertz" Gompertz

library(flexsurv)

#Specify the formula
fitform <- Surv(time, status) ~ 1

for (i in 1:n_trans){  
  fits_wei[[i]] <- flexsurvreg(formula=fitform,
                               data = subset(ms_d_match_surv, trans == i),
                               dist = "weibull")
}

for (i in 1:n_trans){  
  fits_weiph[[i]] <- flexsurvreg(formula=fitform,
                               data = subset(ms_d_match_surv, trans == i),
                               dist = "weibullph")
}

for (i in 1:n_trans){
  fits_llogis[[i]] <- flexsurvreg(formula=fitform,
                               data = subset(ms_d_match_surv, trans == i),
                               dist = "llogis")
}

for (i in 1:n_trans){
  fits_gam[[i]] <- flexsurvreg(formula=fitform,
                               data = subset(ms_d_match_surv, trans == i),
                               dist = "gamma")
}
#LOS ERRORES OCURREN CUANDO NO HAY COVARIABLES
#In (function (q, shape, rate = 1, scale = 1/rate, lower.tail = TRUE,  ... :   NaNs produced
#gamma no funcionó: NaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs producedNaNs produced
for (i in 1:n_trans){
  fits_ggam[[i]] <- flexsurvreg(formula=fitform,
                               data = subset(ms_d_match_surv, trans == i),
                               dist = "gengamma")
}

for (i in 1:n_trans){
  fits_gomp[[i]] <- flexsurvreg(formula=fitform,
                                data = subset(ms_d_match_surv, trans == i),
                                dist = "gompertz")
}
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927: valor inicial en 'vmmin' no es finito
for (i in 1:n_trans){
  fits_logn[[i]] <- flexsurvreg(formula=fitform,
                                data = subset(ms_d_match_surv, trans == i),
                                dist = "lnorm")
}

for (i in 1:n_trans){
  fits_exp[[i]] <- flexsurvreg(formula=fitform,
                               data = subset(ms_d_match_surv, trans == i),
                               dist = "exp")
}

for (i in 1:n_trans){
  fits_genf[[i]] <- flexsurvreg(formula=fitform,
                               data = subset(ms_d_match_surv, trans == i),
                               dist = "genf")
}
## Error in optim(method = "BFGS", par = c(mu = 5.08479340915689, sigma = 0.206848828575828, : non-finite finite-difference value [2]
for (i in 1:n_trans){
km.lc[[i]] <- survfit(formula= fitform, data = subset(ms_d_match_surv, trans == i))
}

transition_label<- plots$title
attr(transition_label,"names")<- plots$trans


layout(matrix(1:15, nc = 3, byrow = F))
for (i in 1:n_trans){
plot(km.lc[[i]], col="red", conf.int=F);
  lines(fits_wei[[i]], col="coral4", ci=F);
  lines(fits_weiph[[i]], col="navyblue", ci=F);
  lines(fits_gomp[[i]], col="lightpink", ci=F);
  lines(fits_llogis[[i]], col="gray25", ci=F);#A0A36D
  lines(fits_gam[[i]], col="darkorchid4", ci=F);
  lines(fits_ggam[[i]], col="#496A72", ci=F);
  lines(fits_logn[[i]], col="gray70", ci=F);
  lines(fits_exp[[i]],col="#A0A36D", ci=F);
  lines(fits_exp[[i]],col="aquamarine3", ci=F)
  
legend("bottomleft", legend = c("Kaplan-Meier","Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
                "Generalized gamma", "Lognormal", "Exponential", "Generalized F"), col = 
         c("red","coral4","navyblue","lightpink","gray25",#"#A0A36D","#886894",
           "darkorchid4","#496A72","gray70","#A0A36D", "aquamarine3"), 
       title = "Distributions", cex = .95, bty = "n", lty=1,lwd=3)# lty = 1:2, 
title(main=transition_label[[i]])
}
Figure 19. Vissual Assessment of Survival Curves

Figure 19. Vissual Assessment of Survival Curves

endTime <- Sys.time()

paste0("Time in process: ");endTime - tiempo_antes_fits
## [1] "Time in process: "
## Time difference of 1.240117 mins
#23 min aprox.

#For more complicated models, users should specify what covariate values they want summaries for, rather than relying on the default
#</div>
options(warn=0)

if(no_mostrar==1){
jpeg("eso54.jpg", height=15, width= 15, res= 96, units = "in")

layout(matrix(1:15, nc = 3, byrow = F))
for (i in 1:n_trans){
  plot(km.lc[[i]], col="red", conf.int=F, xlim=c(0,12));
    lines(fits_wei[[i]], col="coral4", ci=F);
    lines(fits_weiph[[i]], col="navyblue", ci=F);
    lines(fits_gomp[[i]], col="lightpink", ci=F);
    lines(fits_llogis[[i]], col="gray25", ci=F);#A0A36D
    lines(fits_gam[[i]], col="darkorchid4", ci=F);
    lines(fits_ggam[[i]], col="#496A72", ci=F);
    lines(fits_logn[[i]], col="gray70", ci=F);
    lines(fits_exp[[i]],col="#A0A36D", ci=F);
    lines(fits_exp[[i]],col="cadetblue", ci=F)
    
  legend("bottomleft", legend = c("Kaplan-Meier","Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
                  "Generalized gamma", "Lognormal", "Exponential", "Generalized F"), col = 
           c("red","coral4","navyblue","lightpink","gray25",#"#A0A36D","#886894",
             "darkorchid4","#496A72","gray70","#A0A36D", "cadetblue"), 
         title = "Distributions", cex = .95, bty = "n", lty=1,lwd=3)# lty = 1:2, 
  title(main=transition_label[[i]])
  }
  dev.off()
}

The following transitions deviated considerably from the models chosen. Consider specify transition parameters manually or check whether to transform the measure of time into an exponential or logarithmic time to event:

  • Readmission2 to DWCA3
  • Readmission2 to TD3
  • Readmission to DWCA2
  • Readmission to TD2
  • Admission to Readmission
  • Admission to DWCA
  • Admission to TD


#install.packages("survHE")
library(survHE)
#First, defines the vector of models to be used
mods <- c("weibull", "weibullph", "llogis", "gamma", "gengamma", "gompertz", "lnorm" ,"exp")#, "genf")

# And then runs the models using MLE via flexsurv
#m2 <- fit.models(formula = formula, data = data, distr = c("exp","wei","lno","llo"), method="inla")
#m3 <- fit.models(formula = formula, data = subset(ms_d_match_surv, trans == 1), distr = mods, method="hmc")
#Error in model.matrix(formula, data)[(mf %>% filter(event == 1))$ID, ] : 
#  subíndice fuera de  los límites

list_aics<-list()
for (i in 1:n_trans){
  m1 <- fit.models(formula = fitform, data = subset(ms_d_match_surv, trans == i), distr = mods)
  model.fit.plot(MLE=m1, stacked = T, #HMC=m3, 
               models = c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
                "Generalized gamma", "Lognormal", "Exponential", "Generalized F"),
               #lab.profile = c("Outpatient","Residential")
               name_legend = "Inferential method")+ 
  ggtitle("Model comparison based on AIC")+
  labs(subtitle=plots[i,"title"])
}


tiempo_antes_fits2<-Sys.time()


newtime0 = seq(from=round(min(ms_d_match_surv$time),0), to= round(max(ms_d_match_surv$time),0))

#_#_#_#_#_#_#_#_#_#_#_#_#_
#covariates

#Specify the formula
fitform2 <- Surv(time, status) ~  factor(tipo_de_plan_res_1)


kernel_haz_est2a<-list()
kernel_haz_est2b<-list()
kernel_haz2a<-list()
kernel_haz2b<-list()
for (i in 1:n_trans){
library("muhaz")
kernel_haz_est2a[[i]] <- muhaz(ms_d_match_surv[which(ms_d_match_surv$trans==i &
                        ms_d_match_surv$tipo_de_plan_res_1==1),"time"],
                        ms_d_match_surv[which(ms_d_match_surv$trans==i &
                        ms_d_match_surv$tipo_de_plan_res_1==1),"status"])
kernel_haz2a[[i]] <- data.table(time = kernel_haz_est2a[[i]]$est.grid,
                         est = kernel_haz_est2a[[i]]$haz.est,
                         dist = "Kernel density")
kernel_haz_est2b[[i]] <- muhaz(ms_d_match_surv[which(ms_d_match_surv$trans==i &
                        ms_d_match_surv$tipo_de_plan_res_1==0),"time"],
                        ms_d_match_surv[which(ms_d_match_surv$trans==i &
                        ms_d_match_surv$tipo_de_plan_res_1==0),"status"])
kernel_haz2b[[i]] <- data.table(time = kernel_haz_est2b[[i]]$est.grid,
                         est = kernel_haz_est2b[[i]]$haz.est,
                         dist = "Kernel density")
}

haz_int_only2<-
  rbind(cbind(trans=rep(1:n_trans,each=nrow(kernel_haz2a[[i]])),
              tipo_de_plan_res_1=rep(1,nrow(kernel_haz2a[[i]])),
      rbindlist(kernel_haz2a)),
      cbind(trans=rep(1:n_trans,each=nrow(kernel_haz2b[[i]])),
            tipo_de_plan_res_1=rep(0,nrow(kernel_haz2b[[i]])),
      rbindlist(kernel_haz2b)))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

n_trans2 <- max(trans_matrix, na.rm = T)

dists_w_covs_10s <- cbind.data.frame(covs=c(rep("fits_",9*n_trans2)),
              formal=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
                "Generalized gamma", "Lognormal", "Exponential", "Generalized F"),1*n_trans2),
              dist=c("weibull", "weibullph", "llogis", "gamma", "gengamma", "gompertz", "lnorm", "exp", "genf"),
              model=rep(c("wei2", "weiph2", "gomp2", "llogis2", "gam2","ggam2", "logn2", "exp2", "genf2"),1*n_trans2),
              trans=rep(1:n_trans2, each=9))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
no_attempts <- 20

fits_wei2 <- vector(mode = "list", length = n_trans2)
fits_weiph2 <- vector(mode = "list", length = n_trans2)
fits_llogis2 <- vector(mode = "list", length = n_trans2)
fits_gomp2 <- vector(mode = "list", length = n_trans2)
fits_logn2 <- vector(mode = "list", length = n_trans2)
fits_exp2 <- vector(mode = "list", length = n_trans2)
fits_gam2 <- vector(mode = "list", length = n_trans2)
fits_ggam2 <- vector(mode = "list", length = n_trans2)
fits_genf2 <- vector(mode = "list", length = n_trans2)
fits_c_wei2 <- vector(mode = "list", length = n_trans2)
fits_c_weiph2 <- vector(mode = "list", length = n_trans2)
fits_c_llogis2 <- vector(mode = "list", length = n_trans2)
fits_c_gomp2 <- vector(mode = "list", length = n_trans2)
fits_c_logn2 <- vector(mode = "list", length = n_trans2)
fits_c_exp2 <- vector(mode = "list", length = n_trans2)
fits_c_gam2 <- vector(mode = "list", length = n_trans2)
fits_c_ggam2 <- vector(mode = "list", length = n_trans2)
fits_c_genf2 <- vector(mode = "list", length = n_trans2)

km.lc2a<-list()
km.lc2b<-list()
fits_cox2<-list()
fits_c_cox2<-list()

for (i in 1:n_trans2){
  r <- NULL
  attempt <- 0
  while( is.null(r) && attempt <= no_attempts ) {
    attempt <- attempt + 1
    try(
      r <- flexsurvreg(formula=fitform2,
                                 data = subset(ms_d_match_surv, trans == i),
                                 dist = "weibull")
    )
  } 
  fits_wei2[[i]] <- r
}

for (i in 1:n_trans2){
  r <- NULL
  attempt <- 0
  while( is.null(r) && attempt <= no_attempts ) {
    attempt <- attempt + 1
    try(
      r <- flexsurvreg(formula=fitform2,
                                 data = subset(ms_d_match_surv, trans == i),
                                 dist = "weibullph")
    )
  } 
  fits_weiph2[[i]] <- r
}

for (i in 1:n_trans2){
  r <- NULL
  attempt <- 0
  while( is.null(r) && attempt <= no_attempts ) {
    attempt <- attempt + 1
    try(
      r <- flexsurvreg(formula=fitform2,
                                 data = subset(ms_d_match_surv, trans == i),
                                 dist = "llogis")
    )
  } 
  fits_llogis2[[i]] <- r
}

for (i in 1:n_trans2){
  r <- NULL
  attempt <- 0
  while( is.null(r) && attempt <= no_attempts ) {
    attempt <- attempt + 1
    try(
      r <- flexsurvreg(formula=fitform2,
                                 data = subset(ms_d_match_surv, trans == i),
                                 dist = "gamma")
    )
  } 
  fits_gam2[[i]] <- r
}

for (i in 1:n_trans2){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
      try(
        r <- flexsurvreg(formula=fitform2,
                                   data = subset(ms_d_match_surv, trans == i),
                                   dist = "gengamma")
      )
    }
    fits_ggam2[[i]] <- r
}

for (i in 1:n_trans2){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
      try(
        r <- flexsurvreg(formula=fitform2,
                                   data = subset(ms_d_match_surv, trans == i),
                                   dist = "gompertz")
      )
    }
    fits_gomp2[[i]] <- r
}  
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.95053822592927,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.74418625106962,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
## Error in optim(method = "BFGS", par = c(shape = 0.001, rate = -5.6599135477941,  : 
##   valor inicial en 'vmmin' no es finito
for (i in 1:n_trans2){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
      try(
        r <- flexsurvreg(formula=fitform2,
                                   data = subset(ms_d_match_surv, trans == i),
                                   dist = "lnorm")
      )
    }
    fits_logn2[[i]] <- r
}  

for (i in 1:n_trans2){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
      try(
        r <- flexsurvreg(formula=fitform2,
                                   data = subset(ms_d_match_surv, trans == i),
                                   dist = "exp")
      )
    }
    fits_exp2[[i]] <- r
}  


for (i in 1:n_trans2){
    r <- NULL
    attempt <- 0
    while( is.null(r) && attempt <= no_attempts ) {
      attempt <- attempt + 1
      try(
        r <- flexsurvreg(formula=fitform2,
                                   data = subset(ms_d_match_surv, trans == i),
                                   dist = "genf")
      )
    }
    fits_genf2[[i]] <- r
}  

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

fitted_km<-data.frame()
for (i in 1:n_trans2){
km.lc2a[[i]] <- survfit(formula= fitform, data = subset(ms_d_match_surv, trans == i & tipo_de_plan_res_1==1))
km.lc2b[[i]] <- survfit(formula= fitform, data = subset(ms_d_match_surv, trans == i & tipo_de_plan_res_1==0))

fitted_km<-rbind(fitted_km,cbind.data.frame(trans=i,residential=rep(1,), fortify(km.lc2a[[i]])))
fitted_km<-rbind(fitted_km,cbind.data.frame(trans=i,residential=rep(0,), fortify(km.lc2b[[i]])))
}

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

newdat2a <- data.table::data.table(tipo_de_plan_res_1= factor(c(rep(1,1))))
newdat2b <- data.table::data.table(tipo_de_plan_res_1= factor(c(rep(0,1))))

fitted_flexsurvreg<-data.frame()
fit_flexsurvreg<-data.frame()

for (i in 1:nrow(dists_w_covs_10s)){  #
  
cat(paste0("#### Flexible Survival Model (w/ covs): ",
             dists_w_covs_10s[i,"formal"], "; transition: ",dists_w_covs_10s[i,"trans"], "\n \n"))  
  
model<-paste0("fits_",dists_w_covs_10s[i,"model"])

if(!is.null(get(model)[[dists_w_covs_10s[i,"trans"]]])){  
  #Generate databases
 fitted_flexsurvreg<- rbind(fitted_flexsurvreg,cbind.data.frame(dist=rep(dists_w_covs_10s[i,"formal"],), 
                            trans=rep(dists_w_covs_10s[i,"trans"],),
                            residential=rep(1,),
                            data.table::data.table(summary(get(model)[[dists_w_covs_10s[i,"trans"]]], newdata= newdat2a, newtime=unique(fitted_km$time), type = "survival", tidy=T)))) 
  fitted_flexsurvreg<- rbind(fitted_flexsurvreg,cbind.data.frame(dist=rep(dists_w_covs_10s[i,"formal"],), 
                            trans=rep(dists_w_covs_10s[i,"trans"],),
                            residential=rep(0,),
                            data.table::data.table(summary(get(model)[[dists_w_covs_10s[i,"trans"]]], newdata= newdat2b,  newtime=unique(fitted_km$time), type = "survival", tidy=T)))) 
 #t=newtime0, 
 
  # Generate fit indices
  fit_flexsurvreg<-rbind(fit_flexsurvreg,
     cbind(dist= dists_w_covs_10s[i,"formal"],
           transition=dists_w_covs_10s[i,"trans"],
           fitstats.flexsurvreg(get(model)[[dists_w_covs_10s[i,"trans"]]])))
  #the BIC may not be appropriate if none of the candidate models are considered to be close to the ‘true’ model.     
  } else {
  cat(paste0("The model that assumed a ",dists_w_covs_10s[i,"formal"]," distribution for the transition number ",dists_w_covs_10s[i,"trans"]," did not converge \n\n"))
  }
}
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 1
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 1
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 1
##  
## The model that assumed a Gompertz distribution for the transition number 1 did not converge 
## 
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 1
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 1
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 1
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 1
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 1
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 1
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 2
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 2
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 2
##  
## The model that assumed a Gompertz distribution for the transition number 2 did not converge 
## 
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 2
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 2
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 2
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 2
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 2
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 2
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 3
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 3
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 3
##  
## The model that assumed a Gompertz distribution for the transition number 3 did not converge 
## 
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 3
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 3
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 3
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 3
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 3
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 3
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 4
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 4
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 4
##  
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 4
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 4
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 4
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 4
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 4
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 4
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 5
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 5
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 5
##  
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 5
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 5
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 5
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 5
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 5
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 5
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 6
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 6
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 6
##  
## The model that assumed a Gompertz distribution for the transition number 6 did not converge 
## 
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 6
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 6
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 6
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 6
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 6
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 6
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 7
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 7
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 7
##  
## The model that assumed a Gompertz distribution for the transition number 7 did not converge 
## 
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 7
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 7
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 7
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 7
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 7
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 7
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 8
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 8
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 8
##  
## The model that assumed a Gompertz distribution for the transition number 8 did not converge 
## 
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 8
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 8
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 8
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 8
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 8
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 8
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 9
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 9
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 9
##  
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 9
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 9
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 9
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 9
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 9
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 9
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 10
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 10
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 10
##  
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 10
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 10
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 10
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 10
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 10
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 10
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 11
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 11
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 11
##  
## The model that assumed a Gompertz distribution for the transition number 11 did not converge 
## 
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 11
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 11
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 11
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 11
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 11
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 11
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 12
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 12
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 12
##  
## The model that assumed a Gompertz distribution for the transition number 12 did not converge 
## 
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 12
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 12
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 12
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 12
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 12
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 12
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 13
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 13
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 13
##  
## The model that assumed a Gompertz distribution for the transition number 13 did not converge 
## 
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 13
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 13
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 13
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 13
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 13
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 13
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 14
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 14
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 14
##  
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 14
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 14
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 14
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 14
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 14
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 14
##  
## #### Flexible Survival Model (w/ covs): Weibull (AFT); transition: 15
##  
## #### Flexible Survival Model (w/ covs): Weibull (PH); transition: 15
##  
## #### Flexible Survival Model (w/ covs): Gompertz; transition: 15
##  
## #### Flexible Survival Model (w/ covs): Log-logistic; transition: 15
##  
## #### Flexible Survival Model (w/ covs): Gamma; transition: 15
##  
## #### Flexible Survival Model (w/ covs): Generalized gamma; transition: 15
##  
## #### Flexible Survival Model (w/ covs): Lognormal; transition: 15
##  
## #### Flexible Survival Model (w/ covs): Exponential; transition: 15
##  
## #### Flexible Survival Model (w/ covs): Generalized F; transition: 15
## 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
if(no_mostrar==1){
  fit_flexsurvreg %>% 
    dplyr::group_by(trans) %>% 
    summarise(mean(ucl,na.rm=T))
  }
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

#Calculate error
#c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma","Generalized gamma", "Lognormal", "Exponential", "Generalized F")

fitted_flexsurvreg_binned_mix<-
data.frame(fitted_flexsurvreg)[,c("dist","trans","residential","time","est","lcl","ucl")] %>% 
  dplyr::left_join(fitted_km, by=c("trans","residential","time")) %>% 
#there are many observations that was not available due to empirical missing data
#dplyr::filter(!is.na(surv))
  dplyr::group_by(dist,trans,residential) %>% 
  #dplyr::mutate(est= ifelse(is.na(est), mean(est, na.rm=TRUE), est)) %>% 
  #dplyr::mutate(surv= ifelse(is.na(surv), mean(surv, na.rm=TRUE), surv)) %>% 
  tidyr::fill(surv,.direction="updown") %>% 
  tidyr::fill(est,.direction="updown") %>% 
  ungroup()

db_for_apply_rmse<-
  cbind.data.frame(residential=rep(c("0","1"),each=9,n_trans2),
      dist=rep(c("Weibull (AFT)", "Weibull (PH)", "Gompertz", "Log-logistic", "Gamma",
                "Generalized gamma", "Lognormal", "Exponential", "Generalized F"),2*n_trans2),
      trans=rep(c(1:n_trans2),each=9*2))
   
rmse_comp_fits<- data.frame()
for(i in 1:nrow(db_for_apply_rmse)){
  rmse<- Metrics::rmse(subset(fitted_flexsurvreg_binned_mix,residential==db_for_apply_rmse[i,"residential"] & 
                       dist==db_for_apply_rmse[i,"dist"] & 
                       trans==db_for_apply_rmse[i,"trans"])$est,
              subset(fitted_flexsurvreg_binned_mix,residential==db_for_apply_rmse[i,"residential"] & 
                       dist==db_for_apply_rmse[i,"dist"] & 
                       trans==db_for_apply_rmse[i,"trans"])$surv)

  rmse_comp_fits<- rbind(rmse_comp_fits,cbind(dist=db_for_apply_rmse[i,"dist"],
                                                  residential=db_for_apply_rmse[i,"residential"],
                                                  trans=db_for_apply_rmse[i,"trans"],
                                                  rmse=rmse))
}

rmse_comp_fits<-
  rmse_comp_fits %>% 
    dplyr::filter(rmse!="NaN") %>%  
      tidyr::pivot_wider(names_from = residential, values_from = rmse) %>% 
      dplyr::rename("out"="0","res"="1") %>% 
      dplyr::mutate(out=as.numeric(out),res=as.numeric(res)) %>% 
      dplyr::mutate(mean_rmse=rowSums(.[3:4],na.rm=T)/2) %>% 
      dplyr::arrange(trans,dist, mean_rmse)%>%
      dplyr::mutate(mean_rmse=round(mean_rmse,4))

setting_type_label<- c(`0`="Outpatient",`1`="Residential") 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

#http://colorschemedesigner.com/csd-3.5/#
#https://www.r-graph-gallery.com/42-colors-names.html

plot_fitted_flexsurvreg<-
    data.frame(fitted_flexsurvreg)[,c("dist","trans","residential","time","est","lcl","ucl")] %>% 
      dplyr::mutate(residential=as.factor(residential)) %>% 
    ggplot()+
    geom_line(aes(x=time, y=est, color=dist),size=1)+
    geom_line(data=fitted_km,aes(time, surv),color="red",size=1, linetype=4)+
    #geom_ribbon(aes(x = time, ymin = lcl, ymax = ucl, fill = dist), alpha = 0.2)+
    scale_color_manual(name="Distributions", values =                         c("gray60","gray20","darkseagreen3","#D3A347","#4F3C91","coral4","#085754","lightpink","turquoise","navyblue")) +
    scale_fill_manual(name="Distributions", values = c("gray60","gray20","darkseagreen3","#D3A347","#4F3C91","coral4","#085754","lightpink","turquoise","navyblue")) +
    facet_wrap(residential~trans,labeller = labeller(trans = transition_label, residential=setting_type_label))+
    sjPlot::theme_sjplot2()+
    #ylim(0,.3)+
    scale_x_continuous(breaks = seq(0, max(newtime0,na.rm=T), 1000), limits=c(0,max(newtime0,na.rm=T)))+
    theme(legend.position="bottom",
          strip.background = element_rect(fill = "white", colour = "white"))+
    labs(y="Survival",x="Time (days)",caption="Note. Kernel density, stratified by type of setting")


plot_fitted_flexsurvreg
Figure 20. Vissual Assessment of Suvival, Ten-states Model (w/ covars)

Figure 20. Vissual Assessment of Suvival, Ten-states Model (w/ covars)

## http://www.statistica.it/gianluca/teaching/r-hta-workshop/2019/Bullement.pdf
tiempo_despues_fits2<-Sys.time()

paste0("Time in process: ");tiempo_despues_fits2-tiempo_antes_fits2
## [1] "Time in process: "
## Time difference of 8.102439 mins
#13 minutos aprox. en DELL
options(warn=0)

if(no_mostrar==1){
jpeg("eso4.jpg", height=15, width= 15, res= 96, units = "in")
plot_fitted_flexsurvreg
dev.off()
}
fit_flexurvreg_kable<-
fit_flexsurvreg %>% 
  dplyr::arrange(dist, transition, AIC) %>% 
  dplyr::left_join(cbind.data.frame(transition_label,trans_nmb=1:n_trans2),by=c("transition"="trans_nmb")) %>% 
  dplyr::mutate(trans_w_nmb=paste0(transition,")",transition_label)) %>% 
  dplyr::select(trans_w_nmb,dist,Df,n2ll,AIC,AICc,BIC)  
  
fit_flexurvreg_kable %>%   
      knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 11. Fit indices of the adjusted survival analyses"),
               col.names = c("Transition","Distribution", "DF","Negative 2 Log Likelihood","AIC","AICc","BIC"),
               align =c("l",rep('c', 101))) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 12) %>%
  #kableExtra::add_footnote("Note. NA= Null values", notation="none") %>% 
  kableExtra::scroll_box(width = "100%", height = "375px") 
Table 11. Fit indices of the adjusted survival analyses
Transition Distribution DF Negative 2 Log Likelihood AIC AICc BIC
1)Admission to TD Exponential 2 92,564.572 92,568.572 92,568.572 92,584.651
2)Admission to DWCA Exponential 2 198,236.649 198,240.649 198,240.650 198,256.728
3)Admission to Readmission Exponential 2 16,555.830 16,559.830 16,559.830 16,575.909
4)TD to Readmission Exponential 2 29,170.749 29,174.749 29,174.751 29,187.989
5)DWCA to Readmission Exponential 2 76,320.423 76,324.423 76,324.424 76,339.404
6)Readmission to TD2 Exponential 2 23,393.575 23,397.575 23,397.577 23,411.102
7)Readmission to DWCA2 Exponential 2 53,567.586 53,571.586 53,571.588 53,585.114
8)Readmission to Readmission2 Exponential 2 5,704.357 5,708.357 5,708.359 5,721.884
9)TD2 to Readmission2 Exponential 2 7,779.247 7,783.247 7,783.256 7,793.761
10)DWCA2 to Readmission2 Exponential 2 23,296.027 23,300.027 23,300.031 23,312.443
11)Readmission2 to TD3 Exponential 2 6,544.187 6,548.187 6,548.193 6,559.400
12)Readmission2 to DWCA3 Exponential 2 16,329.152 16,333.152 16,333.158 16,344.364
13)Readmission2 to Readmission3 Exponential 2 2,087.162 2,091.162 2,091.168 2,102.375
14)TD3 to Readmission3 Exponential 2 2,232.497 2,236.497 2,236.528 2,244.455
15)DWCA3 to Readmission3 Exponential 2 7,247.732 7,251.732 7,251.743 7,261.789
1)Admission to TD Gamma 3 92,522.715 92,528.715 92,528.716 92,552.834
2)Admission to DWCA Gamma 3 194,304.455 194,310.455 194,310.456 194,334.574
3)Admission to Readmission Gamma 3 16,373.765 16,379.765 16,379.766 16,403.883
4)TD to Readmission Gamma 3 28,245.837 28,251.837 28,251.841 28,271.697
5)DWCA to Readmission Gamma 3 73,374.335 73,380.335 73,380.337 73,402.806
6)Readmission to TD2 Gamma 3 23,346.190 23,352.190 23,352.193 23,372.481
7)Readmission to DWCA2 Gamma 3 52,920.473 52,926.473 52,926.477 52,946.764
8)Readmission to Readmission2 Gamma 3 5,593.005 5,599.005 5,599.009 5,619.297
9)TD2 to Readmission2 Gamma 3 7,582.457 7,588.457 7,588.474 7,604.228
10)DWCA2 to Readmission2 Gamma 3 22,581.497 22,587.497 22,587.504 22,606.120
11)Readmission2 to TD3 Gamma 3 6,513.454 6,519.454 6,519.466 6,536.274
12)Readmission2 to DWCA3 Gamma 3 16,195.288 16,201.288 16,201.300 16,218.107
13)Readmission2 to Readmission3 Gamma 3 2,034.743 2,040.743 2,040.755 2,057.562
14)TD3 to Readmission3 Gamma 3 2,214.401 2,220.401 2,220.462 2,232.337
15)DWCA3 to Readmission3 Gamma 3 7,043.958 7,049.958 7,049.979 7,065.043
1)Admission to TD Generalized F 5 87,704.500 87,714.500 87,714.502 87,754.698
2)Admission to DWCA Generalized F 5 186,868.327 186,878.327 186,878.330 186,918.525
3)Admission to Readmission Generalized F 5 16,106.711 16,116.711 16,116.713 16,156.909
4)TD to Readmission Generalized F 5 28,187.824 28,197.824 28,197.834 28,230.924
5)DWCA to Readmission Generalized F 5 73,039.825 73,049.825 73,049.830 73,087.276
6)Readmission to TD2 Generalized F 5 22,512.332 22,522.332 22,522.342 22,556.151
7)Readmission to DWCA2 Generalized F 5 51,267.239 51,277.239 51,277.249 51,311.058
8)Readmission to Readmission2 Generalized F 5 5,489.012 5,499.012 5,499.022 5,532.831
9)TD2 to Readmission2 Generalized F 5 7,575.901 7,585.901 7,585.943 7,612.186
10)DWCA2 to Readmission2 Generalized F 5 22,508.258 22,518.258 22,518.274 22,549.296
11)Readmission2 to TD3 Generalized F 5 6,283.515 6,293.515 6,293.545 6,321.547
12)Readmission2 to DWCA3 Generalized F 5 15,720.330 15,730.330 15,730.360 15,758.362
13)Readmission2 to Readmission3 Generalized F 5 2,005.608 2,015.608 2,015.638 2,043.640
14)TD3 to Readmission3 Generalized F 5 2,213.161 2,223.161 2,223.315 2,243.055
15)DWCA3 to Readmission3 Generalized F 5 7,005.667 7,015.667 7,015.721 7,040.809
1)Admission to TD Generalized gamma 4 90,881.314 90,889.314 90,889.316 90,921.472
2)Admission to DWCA Generalized gamma 4 188,900.297 188,908.297 188,908.299 188,940.456
3)Admission to Readmission Generalized gamma 4 16,113.065 16,121.065 16,121.066 16,153.223
4)TD to Readmission Generalized gamma 4 28,193.303 28,201.303 28,201.311 28,227.784
5)DWCA to Readmission Generalized gamma 4 73,063.853 73,071.853 73,071.856 73,101.814
6)Readmission to TD2 Generalized gamma 4 23,061.878 23,069.878 23,069.884 23,096.933
7)Readmission to DWCA2 Generalized gamma 4 51,791.304 51,799.304 51,799.311 51,826.359
8)Readmission to Readmission2 Generalized gamma 4 5,489.012 5,497.012 5,497.018 5,524.067
9)TD2 to Readmission2 Generalized gamma 4 7,578.818 7,586.818 7,586.847 7,607.846
10)DWCA2 to Readmission2 Generalized gamma 4 22,515.967 22,523.967 22,523.978 22,548.798
11)Readmission2 to TD3 Generalized gamma 4 6,339.686 6,347.686 6,347.706 6,370.112
12)Readmission2 to DWCA3 Generalized gamma 4 15,881.440 15,889.440 15,889.460 15,911.866
13)Readmission2 to Readmission3 Generalized gamma 4 2,007.705 2,015.705 2,015.725 2,038.131
14)TD3 to Readmission3 Generalized gamma 4 2,213.772 2,221.772 2,221.874 2,237.687
15)DWCA3 to Readmission3 Generalized gamma 4 7,014.785 7,022.785 7,022.821 7,042.898
4)TD to Readmission Gompertz 3 28,501.255 28,507.255 28,507.259 28,527.115
5)DWCA to Readmission Gompertz 3 73,870.586 73,876.586 73,876.588 73,899.057
9)TD2 to Readmission2 Gompertz 3 7,650.375 7,656.375 7,656.392 7,672.146
10)DWCA2 to Readmission2 Gompertz 3 22,681.959 22,687.959 22,687.965 22,706.582
14)TD3 to Readmission3 Gompertz 3 2,218.455 2,224.455 2,224.516 2,236.392
15)DWCA3 to Readmission3 Gompertz 3 7,038.705 7,044.705 7,044.727 7,059.790
1)Admission to TD Log-logistic 3 90,552.665 90,558.665 90,558.666 90,582.784
2)Admission to DWCA Log-logistic 3 189,532.414 189,538.414 189,538.415 189,562.533
3)Admission to Readmission Log-logistic 3 16,351.994 16,357.994 16,357.995 16,382.113
4)TD to Readmission Log-logistic 3 28,199.065 28,205.065 28,205.069 28,224.925
5)DWCA to Readmission Log-logistic 3 73,150.409 73,156.409 73,156.411 73,178.879
6)Readmission to TD2 Log-logistic 3 22,994.386 23,000.386 23,000.389 23,020.677
7)Readmission to DWCA2 Log-logistic 3 51,822.764 51,828.764 51,828.768 51,849.055
8)Readmission to Readmission2 Log-logistic 3 5,582.781 5,588.781 5,588.784 5,609.072
9)TD2 to Readmission2 Log-logistic 3 7,578.756 7,584.756 7,584.773 7,600.527
10)DWCA2 to Readmission2 Log-logistic 3 22,524.080 22,530.080 22,530.087 22,548.703
11)Readmission2 to TD3 Log-logistic 3 6,424.805 6,430.805 6,430.817 6,447.624
12)Readmission2 to DWCA3 Log-logistic 3 15,883.446 15,889.446 15,889.458 15,906.265
13)Readmission2 to Readmission3 Log-logistic 3 2,031.264 2,037.264 2,037.276 2,054.083
14)TD3 to Readmission3 Log-logistic 3 2,213.944 2,219.944 2,220.006 2,231.881
15)DWCA3 to Readmission3 Log-logistic 3 7,018.517 7,024.517 7,024.538 7,039.601
1)Admission to TD Lognormal 3 90,887.607 90,893.607 90,893.608 90,917.725
2)Admission to DWCA Lognormal 3 189,482.277 189,488.277 189,488.278 189,512.396
3)Admission to Readmission Lognormal 3 16,202.902 16,208.902 16,208.903 16,233.021
4)TD to Readmission Lognormal 3 28,201.197 28,207.197 28,207.201 28,227.057
5)DWCA to Readmission Lognormal 3 73,064.295 73,070.295 73,070.297 73,092.766
6)Readmission to TD2 Lognormal 3 23,071.777 23,077.777 23,077.780 23,098.068
7)Readmission to DWCA2 Lognormal 3 51,844.615 51,850.615 51,850.619 51,870.907
8)Readmission to Readmission2 Lognormal 3 5,527.111 5,533.111 5,533.115 5,553.402
9)TD2 to Readmission2 Lognormal 3 7,594.769 7,600.769 7,600.786 7,616.540
10)DWCA2 to Readmission2 Lognormal 3 22,520.123 22,526.123 22,526.130 22,544.746
11)Readmission2 to TD3 Lognormal 3 6,384.862 6,390.862 6,390.874 6,407.682
12)Readmission2 to DWCA3 Lognormal 3 15,892.561 15,898.561 15,898.573 15,915.380
13)Readmission2 to Readmission3 Lognormal 3 2,014.327 2,020.327 2,020.339 2,037.146
14)TD3 to Readmission3 Lognormal 3 2,220.563 2,226.563 2,226.624 2,238.500
15)DWCA3 to Readmission3 Lognormal 3 7,015.070 7,021.070 7,021.091 7,036.154
1)Admission to TD Weibull (AFT) 3 92,559.332 92,565.332 92,565.333 92,589.451
2)Admission to DWCA Weibull (AFT) 3 192,752.723 192,758.723 192,758.724 192,782.842
3)Admission to Readmission Weibull (AFT) 3 16,403.759 16,409.759 16,409.760 16,433.878
4)TD to Readmission Weibull (AFT) 3 28,221.433 28,227.433 28,227.437 28,247.293
5)DWCA to Readmission Weibull (AFT) 3 73,264.014 73,270.014 73,270.016 73,292.484
6)Readmission to TD2 Weibull (AFT) 3 23,383.257 23,389.257 23,389.261 23,409.548
7)Readmission to DWCA2 Weibull (AFT) 3 52,605.147 52,611.147 52,611.151 52,631.438
8)Readmission to Readmission2 Weibull (AFT) 3 5,611.970 5,617.970 5,617.974 5,638.262
9)TD2 to Readmission2 Weibull (AFT) 3 7,579.442 7,585.442 7,585.459 7,601.213
10)DWCA2 to Readmission2 Weibull (AFT) 3 22,552.760 22,558.760 22,558.767 22,577.383
11)Readmission2 to TD3 Weibull (AFT) 3 6,532.069 6,538.069 6,538.081 6,554.888
12)Readmission2 to DWCA3 Weibull (AFT) 3 16,115.976 16,121.976 16,121.988 16,138.795
13)Readmission2 to Readmission3 Weibull (AFT) 3 2,042.783 2,048.783 2,048.795 2,065.602
14)TD3 to Readmission3 Weibull (AFT) 3 2,213.897 2,219.897 2,219.958 2,231.833
15)DWCA3 to Readmission3 Weibull (AFT) 3 7,032.268 7,038.268 7,038.289 7,053.352
1)Admission to TD Weibull (PH) 3 92,559.332 92,565.332 92,565.333 92,589.451
2)Admission to DWCA Weibull (PH) 3 192,752.723 192,758.723 192,758.724 192,782.842
3)Admission to Readmission Weibull (PH) 3 16,403.759 16,409.759 16,409.760 16,433.878
4)TD to Readmission Weibull (PH) 3 28,221.433 28,227.433 28,227.437 28,247.293
5)DWCA to Readmission Weibull (PH) 3 73,264.014 73,270.014 73,270.016 73,292.484
6)Readmission to TD2 Weibull (PH) 3 23,383.257 23,389.257 23,389.261 23,409.548
7)Readmission to DWCA2 Weibull (PH) 3 52,605.147 52,611.147 52,611.151 52,631.438
8)Readmission to Readmission2 Weibull (PH) 3 5,611.970 5,617.970 5,617.974 5,638.262
9)TD2 to Readmission2 Weibull (PH) 3 7,579.442 7,585.442 7,585.459 7,601.213
10)DWCA2 to Readmission2 Weibull (PH) 3 22,552.760 22,558.760 22,558.767 22,577.383
11)Readmission2 to TD3 Weibull (PH) 3 6,532.069 6,538.069 6,538.081 6,554.888
12)Readmission2 to DWCA3 Weibull (PH) 3 16,115.976 16,121.976 16,121.988 16,138.795
13)Readmission2 to Readmission3 Weibull (PH) 3 2,042.783 2,048.783 2,048.795 2,065.602
14)TD3 to Readmission3 Weibull (PH) 3 2,213.897 2,219.897 2,219.958 2,231.833
15)DWCA3 to Readmission3 Weibull (PH) 3 7,032.268 7,038.268 7,038.289 7,053.352


rmse_plot<-
rmse_comp_fits %>% 
  dplyr::mutate(trans=as.numeric(trans)) %>% 
    dplyr::left_join(cbind.data.frame(transition_label,trans_nmb=1:n_trans2),by=c("trans"="trans_nmb")) %>% 
  dplyr::mutate(trans_w_nmb=paste0(trans,")",transition_label), 
        trans_w_nmb=factor(trans_w_nmb,levels=unique(fit_flexurvreg_kable$trans_w_nmb))) %>% 
   ggplot()+
  geom_bar(aes(x=dist, y=mean_rmse), position="dodge", stat="identity", alpha=0.4)+
  sjPlot::theme_sjplot2()+
  #geom_errorbar(aes(x=t, ymin=L, ymax=U, color=program), position="dodge", stat="identity", width=0.4,  alpha=0.9, size=1.3)+
  facet_wrap(.~trans_w_nmb, ncol=3, scales="free_y", dir="v") + 
  xlab("") + 
  ylab("")+
  #ylab("State occupancy probabilities") + 
  theme_minimal()+
  theme(legend.position="bottom")+
    theme(panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        strip.background = element_blank(),
        panel.border = element_blank(),
          legend.box.background = element_blank())+
  theme(axis.text.x = element_text(angle = 90, hjust=0.3, vjust=0))

rmse_plot
Figure 21. Suvival RMSEs, Ten-states Model (w/ covars)

Figure 21. Suvival RMSEs, Ten-states Model (w/ covars)

if(no_mostrar==1){
jpeg("eso42.jpg", height=15, width= 10, res= 96, units = "in")
rmse_plot
dev.off()
}

Session Info

path<-rstudioapi::getSourceEditorContext()$path

Sys.getenv("R_LIBS_USER")
## [1] "C:/Users/CISS Fondecyt/OneDrive/Documentos/R/win-library/4.0"
rstudioapi::getSourceEditorContext()
## Document Context: 
## - id:        'CAB0934D'
## - path:      'G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/SUD_CL/Matching_Process_APR_21.Rmd'
## - contents:  <5288 rows>
## Document Selection:
## - [44, 20] -- [44, 20]: ''
#save.image("G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state.RData")

if (grepl("CISS Fondecyt",path)==T){
    save.image("C:/Users/CISS Fondecyt/OneDrive/Escritorio/SUD_CL/mult_state.RData")
  } else if (grepl("andre",path)==T){
    save.image("C:/Users/andre/Desktop/SUD_CL/mult_state.RData")
  } else if (grepl("E:",path)==T){
    save.image("E:/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state.RData")
  } else {
    save.image("G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state.RData")
  }

sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Chile.1252  LC_CTYPE=Spanish_Chile.1252   
## [3] LC_MONETARY=Spanish_Chile.1252 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Chile.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] muhaz_1.2.6.1           mstate_0.3.1            Epi_2.40               
##  [4] lubridate_1.7.9         Amelia_1.7.6            compareGroups_4.4.5    
##  [7] DiagrammeR_1.0.6.1.9000 gurobi_9.1-0            radiant.update_1.4.1   
## [10] survHE_1.1.2            flexsurv_2.0            Rcpp_1.0.5             
## [13] eha_2.8.1               cobalt_4.2.3            sensitivityfull_1.5.6  
## [16] sensitivity2x2xk_1.01   MatchIt_3.0.2           tableone_0.12.0        
## [19] stargazer_5.2.2         reshape2_1.4.4          exactRankTests_0.8-31  
## [22] gridExtra_2.3           foreign_0.8-80          glpkAPI_1.3.2          
## [25] designmatch_0.3.1       Rglpk_0.6-4             slam_0.1-47            
## [28] MASS_7.3-51.6           survMisc_0.5.5          ggfortify_0.4.10       
## [31] rateratio.test_1.0-2    survminer_0.4.8         ggpubr_0.4.0           
## [34] epiR_1.0-15             forcats_0.5.0           purrr_0.3.4            
## [37] readr_1.3.1             tibble_3.0.3            tidyverse_1.3.0        
## [40] treemapify_2.5.3        ggiraph_0.7.0           chilemapas_0.2         
## [43] sf_0.9-3                finalfit_1.0.1          lsmeans_2.30-0         
## [46] emmeans_1.4.8           choroplethrAdmin1_1.1.1 choroplethrMaps_1.0.1  
## [49] choroplethr_3.6.3       acs_2.1.4               XML_3.99-0.3           
## [52] RColorBrewer_1.1-2      panelr_0.7.3            lme4_1.1-23            
## [55] Matrix_1.2-18           dplyr_1.0.1             data.table_1.13.0      
## [58] codebook_0.9.2          devtools_2.3.0          usethis_1.6.1          
## [61] sqldf_0.4-11            RSQLite_2.2.0           gsubfn_0.7             
## [64] proto_1.0.0             broom_0.7.0             zoo_1.8-8              
## [67] altair_4.0.1            rbokeh_0.5.1            janitor_2.0.1          
## [70] plotly_4.9.2.1          kableExtra_1.1.0        Hmisc_4.4-0            
## [73] Formula_1.2-3           survival_3.1-12         lattice_0.20-41        
## [76] ggplot2_3.3.2           stringr_1.4.0           stringi_1.4.6          
## [79] tidyr_1.1.1             knitr_1.29              matrixStats_0.56.0     
## [82] boot_1.3-25            
## 
## loaded via a namespace (and not attached):
##   [1] class_7.3-17         ps_1.3.3             rprojroot_1.3-2     
##   [4] crayon_1.3.4         V8_3.1.0             nlme_3.1-148        
##   [7] backports_1.1.7      reprex_0.3.0         rlang_0.4.7         
##  [10] readxl_1.3.1         performance_0.4.8    SparseM_1.78        
##  [13] nloptr_1.2.2.2       callr_3.4.3          flextable_0.5.10    
##  [16] rjson_0.2.20         cmprsk_2.2-10        ggmap_3.0.0         
##  [19] bit64_0.9-7          glue_1.4.1           loo_2.2.0           
##  [22] sjPlot_2.8.4         rstan_2.19.3         parallel_4.0.2      
##  [25] processx_3.4.3       classInt_0.4-3       tcltk_4.0.2         
##  [28] haven_2.3.1          tidyselect_1.1.0     km.ci_0.5-2         
##  [31] rio_0.5.16           sjmisc_2.8.5         chron_2.3-55        
##  [34] xtable_1.8-4         MatrixModels_0.4-1   magrittr_1.5        
##  [37] evaluate_0.14        gdtools_0.2.2        RgoogleMaps_1.4.5.3 
##  [40] cli_2.0.2            rstudioapi_0.11      sp_1.4-2            
##  [43] rpart_4.1-15         jtools_2.0.5         sjlabelled_1.1.6    
##  [46] RJSONIO_1.3-1.4      maps_3.3.0           gistr_0.5.0         
##  [49] xfun_0.16            parameters_0.8.2     inline_0.3.15       
##  [52] pkgbuild_1.1.0       cluster_2.1.0        ggfittext_0.9.0     
##  [55] quantreg_5.61        png_0.1-7            withr_2.2.0         
##  [58] bitops_1.0-6         plyr_1.8.6           cellranger_1.1.0    
##  [61] e1071_1.7-3          survey_4.0           coda_0.19-3         
##  [64] pillar_1.4.6         multcomp_1.4-13      fs_1.5.0            
##  [67] vctrs_0.3.2          ellipsis_0.3.1       generics_0.0.2      
##  [70] rgdal_1.5-8          tools_4.0.2          munsell_0.5.0       
##  [73] compiler_4.0.2       pkgload_1.1.0        abind_1.4-5         
##  [76] tigris_0.9.4         sessioninfo_1.1.1    rms_6.0-1           
##  [79] visNetwork_2.0.9     jsonlite_1.7.0       WDI_2.6.0           
##  [82] scales_1.1.1         carData_3.0-4        estimability_1.3    
##  [85] lazyeval_0.2.2       car_3.0-8            latticeExtra_0.6-29 
##  [88] effectsize_0.3.2     reticulate_1.16      checkmate_2.0.0     
##  [91] rmarkdown_2.6        openxlsx_4.1.5       sandwich_2.5-1      
##  [94] statmod_1.4.34       webshot_0.5.2        pander_0.6.3        
##  [97] numDeriv_2016.8-1.1  yaml_2.2.1           systemfonts_0.2.3   
## [100] htmltools_0.5.0      memoise_1.1.0        quadprog_1.5-8      
## [103] viridisLite_0.3.0    jsonvalidate_1.1.0   digest_0.6.25       
## [106] assertthat_0.2.1     rappdirs_0.3.1       repr_1.1.0          
## [109] bayestestR_0.7.2     BiasedUrn_1.07       KMsurv_0.1-5        
## [112] units_0.6-6          remotes_2.2.0        blob_1.2.1          
## [115] labeling_0.3         deSolve_1.28         splines_4.0.2       
## [118] hms_0.5.3            rmapshaper_0.4.4     modelr_0.1.8        
## [121] colorspace_1.4-1     base64enc_0.1-3      Metrics_0.1.4       
## [124] nnet_7.3-14          mvtnorm_1.1-1        fansi_0.4.1         
## [127] conquer_1.0.1        truncnorm_1.0-8      R6_2.4.1            
## [130] grid_4.0.2           crul_0.9.0           lifecycle_0.2.0     
## [133] polspline_1.1.19     acepack_1.4.1        labelled_2.5.0      
## [136] StanHeaders_2.21.0-3 writexl_1.3          zip_2.1.1           
## [139] curl_4.3             geojsonlint_0.4.0    ggsignif_0.6.0      
## [142] pryr_0.1.4           minqa_1.2.4          testthat_2.3.2      
## [145] snakecase_0.11.0     desc_1.2.0           TH.data_1.0-10      
## [148] htmlwidgets_1.5.1    officer_0.3.13       crosstalk_1.1.0.1   
## [151] mgcv_1.8-31          rvest_0.3.6          insight_0.9.0       
## [154] htmlTable_2.0.1      codetools_0.2-16     prettyunits_1.1.1   
## [157] dbplyr_1.4.4         vegawidget_0.3.1     gtable_0.3.0        
## [160] DBI_1.1.0            stats4_4.0.2         etm_1.1             
## [163] highr_0.8            httr_1.4.2           KernSmooth_2.23-17  
## [166] farver_2.0.3         uuid_0.1-4           hexbin_1.28.1       
## [169] mice_3.11.0          xml2_1.3.2           ggeffects_0.15.1    
## [172] bit_1.1-15.2         sjstats_0.18.0       jpeg_0.1-8.1        
## [175] pkgconfig_2.0.3      maptools_1.0-1       rstatix_0.6.0       
## [178] mitools_2.4          HardyWeinberg_1.6.6  Rsolnp_1.16         
## [181] httpcode_0.3.0